Tai Truong
fix readme
d202ada
from typing import Any
from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
from langflow.base.textsplitters.model import LCTextSplitterComponent
from langflow.inputs.inputs import DataInput, IntInput, MessageTextInput
from langflow.utils.util import unescape_string
class RecursiveCharacterTextSplitterComponent(LCTextSplitterComponent):
display_name: str = "Recursive Character Text Splitter"
description: str = "Split text trying to keep all related text together."
documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
name = "RecursiveCharacterTextSplitter"
icon = "LangChain"
inputs = [
IntInput(
name="chunk_size",
display_name="Chunk Size",
info="The maximum length of each chunk.",
value=1000,
),
IntInput(
name="chunk_overlap",
display_name="Chunk Overlap",
info="The amount of overlap between chunks.",
value=200,
),
DataInput(
name="data_input",
display_name="Input",
info="The texts to split.",
input_types=["Document", "Data"],
),
MessageTextInput(
name="separators",
display_name="Separators",
info='The characters to split on.\nIf left empty defaults to ["\\n\\n", "\\n", " ", ""].',
is_list=True,
),
]
def get_data_input(self) -> Any:
return self.data_input
def build_text_splitter(self) -> TextSplitter:
if not self.separators:
separators: list[str] | None = None
else:
# check if the separators list has escaped characters
# if there are escaped characters, unescape them
separators = [unescape_string(x) for x in self.separators]
return RecursiveCharacterTextSplitter(
separators=separators,
chunk_size=self.chunk_size,
chunk_overlap=self.chunk_overlap,
)