This commit is contained in:
zealot52099 2024-03-18 10:46:09 +08:00
parent 74db6d9893
commit 98ecdda78d

View File

@ -63,8 +63,8 @@ def split_document(data_path, chunk_size=1000, chunk_overlap=100):
# logger.info(f"splits type {type(splits[0])}") # logger.info(f"splits type {type(splits[0])}")
# logger.info(f'splits size {len(splits)}') # logger.info(f'splits size {len(splits)}')
split_docs += splits split_docs += splits
elif file.endswith('.txt'): elif data_path.endswith('.txt'):
file_path = os.path.join(root, file) file_path = os.path.join(root, data_path)
# logger.info(f'splitting file {file_path}') # logger.info(f'splitting file {file_path}')
text_loader = TextLoader(file_path, encoding='utf-8') text_loader = TextLoader(file_path, encoding='utf-8')
text = text_loader.load() text = text_loader.load()