update: scripts

This commit is contained in:
jujimeizuo 2024-01-23 08:15:01 +08:00
parent f47a360720
commit c7d35c2cc9
4 changed files with 59 additions and 13 deletions

45
scripts/check.py Normal file
View File

@ -0,0 +1,45 @@
import os
import json
def get_all_file_paths(folder_path, suffix=''):
files = os.listdir(folder_path)
path = []
for file in files:
file_path = os.path.join(folder_path, file)
if os.path.isdir(file_path):
path.extend(get_all_file_paths(file_path))
else:
if file_path.endswith(suffix):
path.append(file_path)
return path
def check(filepath):
with open(path, 'rt', encoding='utf-8') as file:
data = json.load(file)
for idx, item in enumerate(data):
dict_item = dict(item)
for conversation in dict_item:
if conversation != 'conversation':
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
try:
if len(dict_item[conversation]) == 0:
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
except:
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
for in_out in dict_item[conversation]:
for key in in_out:
if key != 'system' and key != 'input' and key != 'output':
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
try :
if len(in_out[key]) == 0:
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
except:
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
return 'no error in file: ' + filepath
if __name__ == '__main__':
dir_path = '.'
paths = get_all_file_paths(dir_path, suffix='.json')
for path in paths:
print(check(filepath=path))

View File

@ -7,7 +7,8 @@ def save_merge_json(data_lis, file_path):
json.dump(data_lis, file, indent=4, ensure_ascii=False) json.dump(data_lis, file, indent=4, ensure_ascii=False)
def get_all_file_paths(folder_path): def get_all_file_paths(folder_path, suffix=''):
print(folder_path)
files = os.listdir(folder_path) files = os.listdir(folder_path)
path = [] path = []
for file in files: for file in files:
@ -15,26 +16,26 @@ def get_all_file_paths(folder_path):
if os.path.isdir(file_path): if os.path.isdir(file_path):
path.extend(get_all_file_paths(file_path)) path.extend(get_all_file_paths(file_path))
else: else:
if file_path.endswith(suffix):
path.append(file_path) path.append(file_path)
return path return path
if __name__ == '__main__': if __name__ == '__main__':
conversion_lis = [] conversion_lis = []
folder_path = '' # input folder_path = './' # input
merge_path = '' # input merge_path = 'merge.json' # input
paths = get_all_file_paths(folder_path=folder_path) paths = get_all_file_paths(folder_path=folder_path, suffix='.json')
for path in paths: for path in paths:
print(path) print(path)
with open(path, 'rt', encoding='utf-8') as lines: with open(path, 'rt', encoding='utf-8') as lines:
datas = []
for line in lines: for line in lines:
# 移除行尾的换行符 datas.append(line)
line.rstrip('\n')
# 解析JSON
try: try:
data = json.loads(line) datas = json.loads(''.join(datas))
conversion_lis.append(data) conversion_lis.extend(datas)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
print(f"Error decoding JSON: {e}") print(f"Error decoding JSON: {e}")
save_merge_json(data_lis=conversion_lis, file_path=merge_path) save_merge_json(data_lis=conversion_lis, file_path=merge_path)