Merge pull request #88 from chg0901/patch-8

Update qwen_gen_data_NoBash.py 修改生成数量和保存间隔
This commit is contained in:
xzw 2024-03-16 23:20:17 +08:00 committed by GitHub
commit 025061b0ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -54,6 +54,9 @@ if __name__ == '__main__':
emotions_lis = configs['emotions_list'] emotions_lis = configs['emotions_list']
areas_of_life = configs['areas_of_life'] areas_of_life = configs['areas_of_life']
ai_tool = 'qwen' ai_tool = 'qwen'
save_interval = 5
total_num_each_emo_area = 5
conversation_lis = [] conversation_lis = []
@ -61,7 +64,7 @@ if __name__ == '__main__':
for emo in emotions_lis: for emo in emotions_lis:
gen_path = f'./{ai_tool}/{area}/{emo}.jsonl' gen_path = f'./{ai_tool}/{area}/{emo}.jsonl'
for i in tqdm(range(100), desc='{emo}, {area}'.format(emo=emo, area=area)): for i in tqdm(range(total_num_each_emo_area), desc='{emo}, {area}'.format(emo=emo, area=area)):
one_conversation = { one_conversation = {
"conversation": [] "conversation": []
} }
@ -98,8 +101,7 @@ if __name__ == '__main__':
) )
conversation_lis.append(one_conversation) conversation_lis.append(one_conversation)
# 每生成10条数据存储一次 if ((i+1) % save_interval == 0):
if ((i+1) % 10 == 0):
save_jsonl(data_lis=conversation_lis, file_path=gen_path) save_jsonl(data_lis=conversation_lis, file_path=gen_path)
print(f'generate {gen_path}') print(f'generate {gen_path}')
conversation_lis = [] # 清空 conversation_lis = [] # 清空