diff --git a/app.py b/app.py index 604ea04..d0b5b4b 100644 --- a/app.py +++ b/app.py @@ -13,6 +13,6 @@ elif model == "EmoLLM_Model": elif model == "Llama3_Model": os.system("python download_model.py chg0901/EmoLLM-Llama3-8B-Instruct2.0") # os.system('streamlit run web_demo-Llama3_online.py --server.address=0.0.0.0 --server.port 7860') - os.system('streamlit run web_demo-Llama3_online.py --server.address=0.0.0.0 --server.port 7968') + os.system('streamlit run web_demo-Llama3.py --server.address=0.0.0.0 --server.port 7968') else: print("Please select one model") \ No newline at end of file diff --git a/web_demo-Llama3.py b/web_demo-Llama3.py index 216eb0d..8e6501d 100644 --- a/web_demo-Llama3.py +++ b/web_demo-Llama3.py @@ -25,14 +25,6 @@ if not os.path.isdir("model"): print("[ERROR] not find model dir") exit(0) -# online = True - -## running on local to test online function -# if online: -# from openxlab.model import download -# download(model_repo='chg0901/EmoLLM-Llama3-8B-Instruct2.0', -# output='model') - @dataclass class GenerationConfig: # this config is used for chat to provide more diversity @@ -298,19 +290,7 @@ def main(): st.markdown("我在这里,准备好倾听你的心声了。", unsafe_allow_html=True) model_name_or_path = 'model' adapter_name_or_path = None - # if online: - # model_name_or_path = 'model' - # adapter_name_or_path = None - # else: - # # model_name_or_path = "./xtuner_config/merged_Llama3_8b_instruct_e3" - # # adapter_name_or_path = './xtuner_config/hf_llama3_e1_sc2' - - # model_name_or_path = "./xtuner_config/merged_Llama3_8b_instruct_e1_sc" - # adapter_name_or_path = None - # 若开启4bit推理能够节省很多显存,但效果可能下降 - load_in_4bit = False # True # 6291MiB - # torch.cuda.empty_cache() print('load model begin.') # 加载模型