class LangChainChunker:
    def __init__(self, text):
        self.text = text
    
    def chunker(self, size=1000):
        from langchain.text_splitter import CharacterTextSplitter
        
        # attach the duration of the video to the chunk
        # [[chunk, duration]]
        
        text_splitter = CharacterTextSplitter(
            separator=" ",
            chunk_size=size,
            chunk_overlap=0.9,
        )
        
        return text_splitter.split_text(self.text)
    
    def __sizeof__(self) -> int:
        count = 0
        for _ in self.text:
            count += 1
        return count


def getSubsText(video_id="", getGenerated=False):
    from youtube_transcript_api import YouTubeTranscriptApi as ytapi
    from youtube_transcript_api.formatters import TextFormatter

    tList = ytapi.list_transcripts(video_id)
    data = ""
    if getGenerated:
        # TODO: implement getGenerated
        pass
    
    for t in tList:
        data = t.fetch()
    
    return (TextFormatter().format_transcript(data)).replace("\n", " ")