36 lines
1.2 KiB
Python
36 lines
1.2 KiB
Python
import os
|
|
import json
|
|
|
|
# 设置目录路径,这里假设你的 .jsonl 文件都在当前目录下的directory_path文件夹中
|
|
directory_path = '../初步清洗的QA数据'
|
|
|
|
|
|
def convert_to_desired_format(input_file, output_file):
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
data = [json.loads(line) for line in f]
|
|
|
|
transformed_data = []
|
|
for entry in data:
|
|
transformed_entry = {
|
|
"prompt": entry["question"],
|
|
"completion": entry["answer"]
|
|
}
|
|
transformed_data.append(transformed_entry)
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(transformed_data, f, ensure_ascii=False, indent=4)
|
|
|
|
|
|
# 遍历指定目录下的所有文件
|
|
for filename in os.listdir(directory_path):
|
|
for root, dirs, files in os.walk(directory_path):
|
|
# 遍历当前文件夹下的文件
|
|
for filename in files:
|
|
# 检查文件扩展名是否为.json
|
|
if filename.endswith('.jsonl'):
|
|
# 构建文件的完整路径
|
|
file_path = os.path.join(root, filename)
|
|
output_file = file_path.replace('.jsonl', '.json')
|
|
convert_to_desired_format(file_path, output_file)
|
|
|