[ADD] add evaluation result of base model on 5/10 epochs (#155)

2024-03-28 17:46:08 +08:00 · 2024-03-28 17:46:08 +08:00 · 441a13928a
commit 441a13928a
parent 3bead46e66 21e81298a3
7 changed files with 26 additions and 4 deletions
--- a/evaluate/General_evaluation.md
+++ b/evaluate/General_evaluation.md
@ -48,3 +48,5 @@ pip install torch transformers datasets nltk rouge jieba
 | Qwen1_5-0_5B-chat | 27.23%  | 8.55%   | 17.05%  | 26.65%  | 13.11%  | 7.19%   | 4.05%   |
 | InternLM2_7B_chat_qlora | 37.86%  | 15.23%   | 24.34%  | 39.71%  | 22.66%  | 14.26%   | 9.21%   |
 | InternLM2_7B_chat_full  | 32.45%  | 10.82%   | 20.17%  | 30.48%  | 15.67%  | 8.84%   | 5.02%   |
+| InternLM2_7B_base_qlora_5epoch  | 41.94%  | 20.21%   | 29.67%  | 42.98%  | 27.07%  | 19.33%   | 14.62%   |
+| InternLM2_7B_base_qlora_10epoch | 43.47%  | 22.06%   | 31.4%  | 44.81%  | 29.15%  | 21.44%   | 16.72%   |
--- a/evaluate/General_evaluation_EN.md
+++ b/evaluate/General_evaluation_EN.md
@ -48,3 +48,5 @@ Test the data in data.json with the following results:
 | Qwen1_5-0_5B-chat | 27.23%  | 8.55%   | 17.05%  | 26.65%  | 13.11%  | 7.19%   | 4.05%   |
 | InternLM2_7B_chat_qlora | 37.86%  | 15.23%   | 24.34%  | 39.71%  | 22.66%  | 14.26%   | 9.21%   |
 | InternLM2_7B_chat_full  | 32.45%  | 10.82%   | 20.17%  | 30.48%  | 15.67%  | 8.84%   | 5.02%   |
+| InternLM2_7B_base_qlora_5epoch  | 41.94%  | 20.21%   | 29.67%  | 42.98%  | 27.07%  | 19.33%   | 14.62%   |
+| InternLM2_7B_base_qlora_10epoch | 43.47%  | 22.06%   | 31.4%  | 44.81%  | 29.15%  | 21.44%   | 16.72%   |
--- a/evaluate/README.md
+++ b/evaluate/README.md
@ -9,6 +9,8 @@
 | Qwen1_5-0_5B-chat | 27.23%  | 8.55%   | 17.05%  | 26.65%  | 13.11%  | 7.19%   | 4.05%   |
 | InternLM2_7B_chat_qlora  | 37.86%  | 15.23%   | 24.34%  | 39.71%  | 22.66%  | 14.26%   | 9.21%   |
 | InternLM2_7B_chat_full  | 32.45%  | 10.82%   | 20.17%  | 30.48%  | 15.67%  | 8.84%   | 5.02%   |
+| InternLM2_7B_base_qlora_5epoch  | 41.94%  | 20.21%   | 29.67%  | 42.98%  | 27.07%  | 19.33%   | 14.62%   |
+| InternLM2_7B_base_qlora_10epoch | 43.47%  | 22.06%   | 31.4%  | 44.81%  | 29.15%  | 21.44%   | 16.72%   |

 ## 专业指标评测

--- a/evaluate/README_EN.md
+++ b/evaluate/README_EN.md
@ -9,6 +9,8 @@
 | Qwen1_5-0_5B-chat | 27.23%  | 8.55%   | 17.05%  | 26.65%  | 13.11%  | 7.19%   | 4.05%   |
 | InternLM2_7B_chat_qlora | 37.86%  | 15.23%   | 24.34%  | 39.71%  | 22.66%  | 14.26%   | 9.21%   |
 | InternLM2_7B_chat_full  | 32.45%  | 10.82%   | 20.17%  | 30.48%  | 15.67%  | 8.84%   | 5.02%   |
+| InternLM2_7B_base_qlora_5epoch  | 41.94%  | 20.21%   | 29.67%  | 42.98%  | 27.07%  | 19.33%   | 14.62%   |
+| InternLM2_7B_base_qlora_10epoch | 43.47%  | 22.06%   | 31.4%  | 44.81%  | 29.15%  | 21.44%   | 16.72%   |

 ## Professional Metrics Evaluation

--- a/scripts/qa_generation/QA_clean.py
+++ b/scripts/qa_generation/QA_clean.py
@ -101,7 +101,7 @@ def clean_qa(
                    future.result()
                except Exception as exc:
                    logger.error("Thread generated an exception: %s" % (exc))
-
+        
        merge_sub_qa_generation(result_dir, storage_jsonl_path)


--- a/scripts/qa_generation/config/config.py
+++ b/scripts/qa_generation/config/config.py
@ -12,7 +12,7 @@ model_dir = os.path.join(base_dir, 'model')                                 # mo
 # data
 data_dir = os.path.join(base_dir, 'data')
 clean_dir = os.path.join(data_dir, 'cleaned')
-judge_dir = os.path.join(data_dir, '数据整合')
+judge_dir = os.path.join(data_dir, 'generated')
 result_dir = os.path.join(data_dir, 'generated')                            # result

 # log
@ -29,7 +29,7 @@ wash_prompt_file_path = os.path.join(base_dir, 'choose_prompt.md')
 环境变量
 """
 # api-keys
-DASHSCOPE_API_KEY = ''
+DASHSCOPE_API_KEY = 'sk-4295ec893e9c413abb0551b85e84f39f'


 """
--- a/scripts/qa_generation/util/data_loader.py
+++ b/scripts/qa_generation/util/data_loader.py
@ -117,6 +117,20 @@ def save_to_file(storage_jsonl_path, storage_list):
        for item in storage_list:
            f.write(json.dumps(item, ensure_ascii=False) + '\n')

+import time
+import os
+
+def safe_remove(file_path, max_attempts=5, delay=1):
+    for attempt in range(max_attempts):
+        try:
+            os.remove(file_path)
+            print(f"File {file_path} successfully deleted.")
+            break
+        except PermissionError as e:
+            print(f"Attempt {attempt+1}: Unable to delete {file_path} - {str(e)}")
+            time.sleep(delay)
+    else:
+        print(f"Failed to delete {file_path} after {max_attempts} attempts.")

 """
 将并发产生的文件合并成为一个文件
@ -131,5 +145,5 @@ def merge_sub_qa_generation(directory, storage_jsonl_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                file_contents.append(json.loads(line))
-            os.remove(file_path)
+            # safe_remove(file_path)
    save_to_file(storage_jsonl_path, file_contents)