From 8b3c439717ac726185662a392e5f071692e8d6ca Mon Sep 17 00:00:00 2001 From: HongCheng Date: Sun, 17 Mar 2024 00:18:24 +0900 Subject: [PATCH] =?UTF-8?q?Update=20qwen=5Fgen=5Fdata=5FNoBash.py=20?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E7=94=9F=E6=88=90=E6=95=B0=E9=87=8F=E5=92=8C?= =?UTF-8?q?=E4=BF=9D=E5=AD=98=E9=97=B4=E9=9A=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generate_data/qwen_gen_data_NoBash.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/generate_data/qwen_gen_data_NoBash.py b/generate_data/qwen_gen_data_NoBash.py index e3826aa..6e13374 100644 --- a/generate_data/qwen_gen_data_NoBash.py +++ b/generate_data/qwen_gen_data_NoBash.py @@ -54,6 +54,9 @@ if __name__ == '__main__': emotions_lis = configs['emotions_list'] areas_of_life = configs['areas_of_life'] ai_tool = 'qwen' + + save_interval = 5 + total_num_each_emo_area = 5 conversation_lis = [] @@ -61,7 +64,7 @@ if __name__ == '__main__': for emo in emotions_lis: gen_path = f'./{ai_tool}/{area}/{emo}.jsonl' - for i in tqdm(range(100), desc='{emo}, {area}'.format(emo=emo, area=area)): + for i in tqdm(range(total_num_each_emo_area), desc='{emo}, {area}'.format(emo=emo, area=area)): one_conversation = { "conversation": [] } @@ -98,8 +101,7 @@ if __name__ == '__main__': ) conversation_lis.append(one_conversation) - # 每生成10条数据存储一次 - if ((i+1) % 10 == 0): + if ((i+1) % save_interval == 0): save_jsonl(data_lis=conversation_lis, file_path=gen_path) print(f'generate {gen_path}') conversation_lis = [] # 清空