Spaces:
Running
Running
from typing import Any | |
from langchain_text_splitters import Language, RecursiveCharacterTextSplitter, TextSplitter | |
from langflow.base.textsplitters.model import LCTextSplitterComponent | |
from langflow.inputs import DataInput, DropdownInput, IntInput | |
class LanguageRecursiveTextSplitterComponent(LCTextSplitterComponent): | |
display_name: str = "Language Recursive Text Splitter" | |
description: str = "Split text into chunks of a specified length based on language." | |
documentation: str = "https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter" | |
name = "LanguageRecursiveTextSplitter" | |
icon = "LangChain" | |
inputs = [ | |
IntInput( | |
name="chunk_size", | |
display_name="Chunk Size", | |
info="The maximum length of each chunk.", | |
value=1000, | |
), | |
IntInput( | |
name="chunk_overlap", | |
display_name="Chunk Overlap", | |
info="The amount of overlap between chunks.", | |
value=200, | |
), | |
DataInput( | |
name="data_input", | |
display_name="Input", | |
info="The texts to split.", | |
input_types=["Document", "Data"], | |
), | |
DropdownInput( | |
name="code_language", display_name="Code Language", options=[x.value for x in Language], value="python" | |
), | |
] | |
def get_data_input(self) -> Any: | |
return self.data_input | |
def build_text_splitter(self) -> TextSplitter: | |
return RecursiveCharacterTextSplitter.from_language( | |
language=Language(self.code_language), | |
chunk_size=self.chunk_size, | |
chunk_overlap=self.chunk_overlap, | |
) | |