diff --git a/evaluate/General_evaluation.md b/evaluate/General_evaluation.md index 191392e..71d6b32 100644 --- a/evaluate/General_evaluation.md +++ b/evaluate/General_evaluation.md @@ -48,3 +48,5 @@ pip install torch transformers datasets nltk rouge jieba | Qwen1_5-0_5B-chat | 27.23% | 8.55% | 17.05% | 26.65% | 13.11% | 7.19% | 4.05% | | InternLM2_7B_chat_qlora | 37.86% | 15.23% | 24.34% | 39.71% | 22.66% | 14.26% | 9.21% | | InternLM2_7B_chat_full | 32.45% | 10.82% | 20.17% | 30.48% | 15.67% | 8.84% | 5.02% | +| InternLM2_7B_base_qlora_5epoch | 41.94% | 20.21% | 29.67% | 42.98% | 27.07% | 19.33% | 14.62% | +| InternLM2_7B_base_qlora_10epoch | 43.47% | 22.06% | 31.4% | 44.81% | 29.15% | 21.44% | 16.72% | \ No newline at end of file diff --git a/evaluate/General_evaluation_EN.md b/evaluate/General_evaluation_EN.md index 5de1b94..39b2a55 100644 --- a/evaluate/General_evaluation_EN.md +++ b/evaluate/General_evaluation_EN.md @@ -48,3 +48,5 @@ Test the data in data.json with the following results: | Qwen1_5-0_5B-chat | 27.23% | 8.55% | 17.05% | 26.65% | 13.11% | 7.19% | 4.05% | | InternLM2_7B_chat_qlora | 37.86% | 15.23% | 24.34% | 39.71% | 22.66% | 14.26% | 9.21% | | InternLM2_7B_chat_full | 32.45% | 10.82% | 20.17% | 30.48% | 15.67% | 8.84% | 5.02% | +| InternLM2_7B_base_qlora_5epoch | 41.94% | 20.21% | 29.67% | 42.98% | 27.07% | 19.33% | 14.62% | +| InternLM2_7B_base_qlora_10epoch | 43.47% | 22.06% | 31.4% | 44.81% | 29.15% | 21.44% | 16.72% | diff --git a/evaluate/README.md b/evaluate/README.md index 1cf92ed..d42a3c9 100644 --- a/evaluate/README.md +++ b/evaluate/README.md @@ -9,6 +9,8 @@ | Qwen1_5-0_5B-chat | 27.23% | 8.55% | 17.05% | 26.65% | 13.11% | 7.19% | 4.05% | | InternLM2_7B_chat_qlora | 37.86% | 15.23% | 24.34% | 39.71% | 22.66% | 14.26% | 9.21% | | InternLM2_7B_chat_full | 32.45% | 10.82% | 20.17% | 30.48% | 15.67% | 8.84% | 5.02% | +| InternLM2_7B_base_qlora_5epoch | 41.94% | 20.21% | 29.67% | 42.98% | 27.07% | 19.33% | 14.62% | +| InternLM2_7B_base_qlora_10epoch | 43.47% | 22.06% | 31.4% | 44.81% | 29.15% | 21.44% | 16.72% | ## 专业指标评测 diff --git a/evaluate/README_EN.md b/evaluate/README_EN.md index cbb8aa9..b46b0ce 100644 --- a/evaluate/README_EN.md +++ b/evaluate/README_EN.md @@ -9,6 +9,8 @@ | Qwen1_5-0_5B-chat | 27.23% | 8.55% | 17.05% | 26.65% | 13.11% | 7.19% | 4.05% | | InternLM2_7B_chat_qlora | 37.86% | 15.23% | 24.34% | 39.71% | 22.66% | 14.26% | 9.21% | | InternLM2_7B_chat_full | 32.45% | 10.82% | 20.17% | 30.48% | 15.67% | 8.84% | 5.02% | +| InternLM2_7B_base_qlora_5epoch | 41.94% | 20.21% | 29.67% | 42.98% | 27.07% | 19.33% | 14.62% | +| InternLM2_7B_base_qlora_10epoch | 43.47% | 22.06% | 31.4% | 44.81% | 29.15% | 21.44% | 16.72% | ## Professional Metrics Evaluation diff --git a/scripts/qa_generation/QA_clean.py b/scripts/qa_generation/QA_clean.py index 46f0123..417f975 100644 --- a/scripts/qa_generation/QA_clean.py +++ b/scripts/qa_generation/QA_clean.py @@ -101,7 +101,7 @@ def clean_qa( future.result() except Exception as exc: logger.error("Thread generated an exception: %s" % (exc)) - + merge_sub_qa_generation(result_dir, storage_jsonl_path) diff --git a/scripts/qa_generation/config/config.py b/scripts/qa_generation/config/config.py index d3f9dfc..37c5cd5 100644 --- a/scripts/qa_generation/config/config.py +++ b/scripts/qa_generation/config/config.py @@ -12,7 +12,7 @@ model_dir = os.path.join(base_dir, 'model') # mo # data data_dir = os.path.join(base_dir, 'data') clean_dir = os.path.join(data_dir, 'cleaned') -judge_dir = os.path.join(data_dir, '数据整合') +judge_dir = os.path.join(data_dir, 'generated') result_dir = os.path.join(data_dir, 'generated') # result # log @@ -29,7 +29,7 @@ wash_prompt_file_path = os.path.join(base_dir, 'choose_prompt.md') 环境变量 """ # api-keys -DASHSCOPE_API_KEY = '' +DASHSCOPE_API_KEY = 'sk-4295ec893e9c413abb0551b85e84f39f' """ diff --git a/scripts/qa_generation/util/data_loader.py b/scripts/qa_generation/util/data_loader.py index 5e940dc..a4bb70c 100644 --- a/scripts/qa_generation/util/data_loader.py +++ b/scripts/qa_generation/util/data_loader.py @@ -117,6 +117,20 @@ def save_to_file(storage_jsonl_path, storage_list): for item in storage_list: f.write(json.dumps(item, ensure_ascii=False) + '\n') +import time +import os + +def safe_remove(file_path, max_attempts=5, delay=1): + for attempt in range(max_attempts): + try: + os.remove(file_path) + print(f"File {file_path} successfully deleted.") + break + except PermissionError as e: + print(f"Attempt {attempt+1}: Unable to delete {file_path} - {str(e)}") + time.sleep(delay) + else: + print(f"Failed to delete {file_path} after {max_attempts} attempts.") """ 将并发产生的文件合并成为一个文件 @@ -131,5 +145,5 @@ def merge_sub_qa_generation(directory, storage_jsonl_path): with open(file_path, 'r', encoding='utf-8') as f: for line in f: file_contents.append(json.loads(line)) - os.remove(file_path) + # safe_remove(file_path) save_to_file(storage_jsonl_path, file_contents) \ No newline at end of file