Spaces:
Runtime error
Runtime error
Upload Chunk_Lib.py
Browse files
App_Function_Libraries/Chunk_Lib.py
CHANGED
|
@@ -476,22 +476,22 @@ def semantic_chunk_long_file(file_path, max_chunk_size=1000, overlap=100, unit='
|
|
| 476 |
#
|
| 477 |
# Embedding Chunking
|
| 478 |
|
| 479 |
-
def chunk_for_embedding(text: str, file_name: str,
|
| 480 |
options = chunk_options.copy()
|
| 481 |
if custom_chunk_options:
|
| 482 |
options.update(custom_chunk_options)
|
| 483 |
|
|
|
|
| 484 |
chunks = improved_chunking_process(text, options)
|
| 485 |
total_chunks = len(chunks)
|
|
|
|
| 486 |
|
| 487 |
chunked_text_with_headers = []
|
| 488 |
for i, chunk in enumerate(chunks, 1):
|
| 489 |
chunk_text = chunk['text']
|
| 490 |
chunk_position = determine_chunk_position(chunk['metadata']['relative_position'])
|
| 491 |
-
|
| 492 |
chunk_header = f"""
|
| 493 |
Original Document: {file_name}
|
| 494 |
-
Full Document Summary: {full_summary or "Full document summary not available."}
|
| 495 |
Chunk: {i} of {total_chunks}
|
| 496 |
Position: {chunk_position}
|
| 497 |
|
|
|
|
| 476 |
#
|
| 477 |
# Embedding Chunking
|
| 478 |
|
| 479 |
+
def chunk_for_embedding(text: str, file_name: str, custom_chunk_options: Dict[str, Any] = None) -> List[Dict[str, Any]]:
|
| 480 |
options = chunk_options.copy()
|
| 481 |
if custom_chunk_options:
|
| 482 |
options.update(custom_chunk_options)
|
| 483 |
|
| 484 |
+
logging.info(f"Chunking options: {options}")
|
| 485 |
chunks = improved_chunking_process(text, options)
|
| 486 |
total_chunks = len(chunks)
|
| 487 |
+
logging.info(f"Total chunks created: {total_chunks}")
|
| 488 |
|
| 489 |
chunked_text_with_headers = []
|
| 490 |
for i, chunk in enumerate(chunks, 1):
|
| 491 |
chunk_text = chunk['text']
|
| 492 |
chunk_position = determine_chunk_position(chunk['metadata']['relative_position'])
|
|
|
|
| 493 |
chunk_header = f"""
|
| 494 |
Original Document: {file_name}
|
|
|
|
| 495 |
Chunk: {i} of {total_chunks}
|
| 496 |
Position: {chunk_position}
|
| 497 |
|