From b7c33ca5b9e2302a6459630371f224ef41d0ff80 Mon Sep 17 00:00:00 2001
From: HongCheng <kwchenghong@gmail.com>
Date: Mon, 22 Apr 2024 17:37:09 +0900
Subject: [PATCH] update, not use online flag and model_path para

---
 app.py             |  2 +-
 web_demo-Llama3.py | 20 --------------------
 2 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/app.py b/app.py
index 604ea04..d0b5b4b 100644
--- a/app.py
+++ b/app.py
@@ -13,6 +13,6 @@ elif model == "EmoLLM_Model":
 elif model == "Llama3_Model":
     os.system("python download_model.py chg0901/EmoLLM-Llama3-8B-Instruct2.0")
     # os.system('streamlit run web_demo-Llama3_online.py --server.address=0.0.0.0 --server.port 7860')
-    os.system('streamlit run web_demo-Llama3_online.py --server.address=0.0.0.0 --server.port 7968')
+    os.system('streamlit run web_demo-Llama3.py --server.address=0.0.0.0 --server.port 7968')
 else:
     print("Please select one model")
\ No newline at end of file
diff --git a/web_demo-Llama3.py b/web_demo-Llama3.py
index 216eb0d..8e6501d 100644
--- a/web_demo-Llama3.py
+++ b/web_demo-Llama3.py
@@ -25,14 +25,6 @@ if not os.path.isdir("model"):
     print("[ERROR] not find model dir")
     exit(0)
 
-# online = True
-
-## running on local to test online function
-# if online:
-#     from openxlab.model import download
-#     download(model_repo='chg0901/EmoLLM-Llama3-8B-Instruct2.0', 
-#             output='model')
-
 @dataclass
 class GenerationConfig:
     # this config is used for chat to provide more diversity
@@ -298,19 +290,7 @@ def main():
     st.markdown("我在这里，准备好倾听你的心声了。", unsafe_allow_html=True)
     model_name_or_path = 'model'
     adapter_name_or_path = None
-    # if online:
-    #     model_name_or_path = 'model'
-    #     adapter_name_or_path = None
-    # else:
-    #     # model_name_or_path = "./xtuner_config/merged_Llama3_8b_instruct_e3"
-    #     # adapter_name_or_path = './xtuner_config/hf_llama3_e1_sc2'
-    
-    #     model_name_or_path = "./xtuner_config/merged_Llama3_8b_instruct_e1_sc"
-    #     adapter_name_or_path = None
 
-    # 若开启4bit推理能够节省很多显存，但效果可能下降
-    load_in_4bit = False # True  # 6291MiB
-    
     # torch.cuda.empty_cache()
     print('load model begin.')
     # 加载模型