Spaces:
Runtime error
Runtime error
File size: 953 Bytes
c55e75f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
def chunk_document_to_dict(doc: str, doc_name: str, desired_chunk_size: int = 400, max_chunk_size: int = 500):
chunks = {}
chunk = ''
chunk_number = 1
for line in doc.splitlines():
chunk += line + '\n'
if len(chunk) >= desired_chunk_size:
chunk_id = f"{doc_name}_{chunk_number}"
chunks[chunk_id] = chunk[:max_chunk_size]
chunk = ''
chunk_number += 1
if chunk: # Залишок запихаємо в останній чанк
chunk_id = f"{doc_name}_{chunk_number}"
chunks[chunk_id] = chunk
return chunks
def chunk_documents_to_dict(docs: dict, desired_chunk_size: int = 400, max_chunk_size: int = 500):
all_chunks = {}
for doc_name, doc_text in docs.items():
chunks = chunk_document_to_dict(doc_text, doc_name, desired_chunk_size, max_chunk_size)
all_chunks.update(chunks)
return all_chunks |