update, not use online flag and model_path para

This commit is contained in:
HongCheng 2024-04-22 17:37:09 +09:00
parent 9f0ea20d43
commit b7c33ca5b9
2 changed files with 1 additions and 21 deletions

2
app.py
View File

@ -13,6 +13,6 @@ elif model == "EmoLLM_Model":
elif model == "Llama3_Model": elif model == "Llama3_Model":
os.system("python download_model.py chg0901/EmoLLM-Llama3-8B-Instruct2.0") os.system("python download_model.py chg0901/EmoLLM-Llama3-8B-Instruct2.0")
# os.system('streamlit run web_demo-Llama3_online.py --server.address=0.0.0.0 --server.port 7860') # os.system('streamlit run web_demo-Llama3_online.py --server.address=0.0.0.0 --server.port 7860')
os.system('streamlit run web_demo-Llama3_online.py --server.address=0.0.0.0 --server.port 7968') os.system('streamlit run web_demo-Llama3.py --server.address=0.0.0.0 --server.port 7968')
else: else:
print("Please select one model") print("Please select one model")

View File

@ -25,14 +25,6 @@ if not os.path.isdir("model"):
print("[ERROR] not find model dir") print("[ERROR] not find model dir")
exit(0) exit(0)
# online = True
## running on local to test online function
# if online:
# from openxlab.model import download
# download(model_repo='chg0901/EmoLLM-Llama3-8B-Instruct2.0',
# output='model')
@dataclass @dataclass
class GenerationConfig: class GenerationConfig:
# this config is used for chat to provide more diversity # this config is used for chat to provide more diversity
@ -298,18 +290,6 @@ def main():
st.markdown("我在这里,准备好倾听你的心声了。", unsafe_allow_html=True) st.markdown("我在这里,准备好倾听你的心声了。", unsafe_allow_html=True)
model_name_or_path = 'model' model_name_or_path = 'model'
adapter_name_or_path = None adapter_name_or_path = None
# if online:
# model_name_or_path = 'model'
# adapter_name_or_path = None
# else:
# # model_name_or_path = "./xtuner_config/merged_Llama3_8b_instruct_e3"
# # adapter_name_or_path = './xtuner_config/hf_llama3_e1_sc2'
# model_name_or_path = "./xtuner_config/merged_Llama3_8b_instruct_e1_sc"
# adapter_name_or_path = None
# 若开启4bit推理能够节省很多显存但效果可能下降
load_in_4bit = False # True # 6291MiB
# torch.cuda.empty_cache() # torch.cuda.empty_cache()
print('load model begin.') print('load model begin.')