from .IChunkGenerator import IChunkGenerator import nltk from nltk.tokenize import sent_tokenize class ChunkGenerator(IChunkGenerator): def chunk_text(self, text: str,max_words: int=100) -> list: sentences = sent_tokenize(text) chunks, chunk = [], [] word_count = 0 for sentence in sentences: word_count += len(sentence.split()) chunk.append(sentence) if word_count >= max_words: chunks.append(" ".join(chunk)) chunk = [] word_count = 0 if chunk: chunks.append(" ".join(chunk)) return chunks