OliveSensorAPI/datasets/processed/Book_QA_process_Step_1.py

36 lines
1.2 KiB
Python

import os
import json
# 设置目录路径,这里假设你的 .jsonl 文件都在当前目录下的directory_path文件夹中
directory_path = '../初步清洗的QA数据'
def convert_to_desired_format(input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as f:
data = [json.loads(line) for line in f]
transformed_data = []
for entry in data:
transformed_entry = {
"prompt": entry["question"],
"completion": entry["answer"]
}
transformed_data.append(transformed_entry)
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(transformed_data, f, ensure_ascii=False, indent=4)
# 遍历指定目录下的所有文件
for filename in os.listdir(directory_path):
for root, dirs, files in os.walk(directory_path):
# 遍历当前文件夹下的文件
for filename in files:
# 检查文件扩展名是否为.json
if filename.endswith('.jsonl'):
# 构建文件的完整路径
file_path = os.path.join(root, filename)
output_file = file_path.replace('.jsonl', '.json')
convert_to_desired_format(file_path, output_file)