update: scripts
This commit is contained in:
parent
f47a360720
commit
c7d35c2cc9
45
scripts/check.py
Normal file
45
scripts/check.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
def get_all_file_paths(folder_path, suffix=''):
|
||||||
|
files = os.listdir(folder_path)
|
||||||
|
path = []
|
||||||
|
for file in files:
|
||||||
|
file_path = os.path.join(folder_path, file)
|
||||||
|
if os.path.isdir(file_path):
|
||||||
|
path.extend(get_all_file_paths(file_path))
|
||||||
|
else:
|
||||||
|
if file_path.endswith(suffix):
|
||||||
|
path.append(file_path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def check(filepath):
|
||||||
|
with open(path, 'rt', encoding='utf-8') as file:
|
||||||
|
data = json.load(file)
|
||||||
|
for idx, item in enumerate(data):
|
||||||
|
dict_item = dict(item)
|
||||||
|
for conversation in dict_item:
|
||||||
|
if conversation != 'conversation':
|
||||||
|
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
|
||||||
|
try:
|
||||||
|
if len(dict_item[conversation]) == 0:
|
||||||
|
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
|
||||||
|
except:
|
||||||
|
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
|
||||||
|
for in_out in dict_item[conversation]:
|
||||||
|
for key in in_out:
|
||||||
|
if key != 'system' and key != 'input' and key != 'output':
|
||||||
|
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
|
||||||
|
try :
|
||||||
|
if len(in_out[key]) == 0:
|
||||||
|
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
|
||||||
|
except:
|
||||||
|
return 'found error in file: ' + filepath + ' at conversation index: ' + str(idx)
|
||||||
|
return 'no error in file: ' + filepath
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dir_path = '.'
|
||||||
|
paths = get_all_file_paths(dir_path, suffix='.json')
|
||||||
|
for path in paths:
|
||||||
|
print(check(filepath=path))
|
@ -7,7 +7,8 @@ def save_merge_json(data_lis, file_path):
|
|||||||
json.dump(data_lis, file, indent=4, ensure_ascii=False)
|
json.dump(data_lis, file, indent=4, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
def get_all_file_paths(folder_path):
|
def get_all_file_paths(folder_path, suffix=''):
|
||||||
|
print(folder_path)
|
||||||
files = os.listdir(folder_path)
|
files = os.listdir(folder_path)
|
||||||
path = []
|
path = []
|
||||||
for file in files:
|
for file in files:
|
||||||
@ -15,26 +16,26 @@ def get_all_file_paths(folder_path):
|
|||||||
if os.path.isdir(file_path):
|
if os.path.isdir(file_path):
|
||||||
path.extend(get_all_file_paths(file_path))
|
path.extend(get_all_file_paths(file_path))
|
||||||
else:
|
else:
|
||||||
|
if file_path.endswith(suffix):
|
||||||
path.append(file_path)
|
path.append(file_path)
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
conversion_lis = []
|
conversion_lis = []
|
||||||
folder_path = '' # input
|
folder_path = './' # input
|
||||||
merge_path = '' # input
|
merge_path = 'merge.json' # input
|
||||||
paths = get_all_file_paths(folder_path=folder_path)
|
paths = get_all_file_paths(folder_path=folder_path, suffix='.json')
|
||||||
|
|
||||||
for path in paths:
|
for path in paths:
|
||||||
print(path)
|
print(path)
|
||||||
with open(path, 'rt', encoding='utf-8') as lines:
|
with open(path, 'rt', encoding='utf-8') as lines:
|
||||||
|
datas = []
|
||||||
for line in lines:
|
for line in lines:
|
||||||
# 移除行尾的换行符
|
datas.append(line)
|
||||||
line.rstrip('\n')
|
|
||||||
# 解析JSON
|
|
||||||
try:
|
try:
|
||||||
data = json.loads(line)
|
datas = json.loads(''.join(datas))
|
||||||
conversion_lis.append(data)
|
conversion_lis.extend(datas)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
print(f"Error decoding JSON: {e}")
|
print(f"Error decoding JSON: {e}")
|
||||||
save_merge_json(data_lis=conversion_lis, file_path=merge_path)
|
save_merge_json(data_lis=conversion_lis, file_path=merge_path)
|
Loading…
Reference in New Issue
Block a user