Tai Truong
fix readme
d202ada
from typing import Any
from langchain_text_splitters import Language, RecursiveCharacterTextSplitter, TextSplitter
from langflow.base.textsplitters.model import LCTextSplitterComponent
from langflow.inputs import DataInput, DropdownInput, IntInput
class LanguageRecursiveTextSplitterComponent(LCTextSplitterComponent):
display_name: str = "Language Recursive Text Splitter"
description: str = "Split text into chunks of a specified length based on language."
documentation: str = "https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter"
name = "LanguageRecursiveTextSplitter"
icon = "LangChain"
inputs = [
IntInput(
name="chunk_size",
display_name="Chunk Size",
info="The maximum length of each chunk.",
value=1000,
),
IntInput(
name="chunk_overlap",
display_name="Chunk Overlap",
info="The amount of overlap between chunks.",
value=200,
),
DataInput(
name="data_input",
display_name="Input",
info="The texts to split.",
input_types=["Document", "Data"],
),
DropdownInput(
name="code_language", display_name="Code Language", options=[x.value for x in Language], value="python"
),
]
def get_data_input(self) -> Any:
return self.data_input
def build_text_splitter(self) -> TextSplitter:
return RecursiveCharacterTextSplitter.from_language(
language=Language(self.code_language),
chunk_size=self.chunk_size,
chunk_overlap=self.chunk_overlap,
)