Tai Truong
fix readme
d202ada
from typing import Any
from langchain_text_splitters import CharacterTextSplitter, TextSplitter
from langflow.base.textsplitters.model import LCTextSplitterComponent
from langflow.inputs import DataInput, IntInput, MessageTextInput
from langflow.utils.util import unescape_string
class CharacterTextSplitterComponent(LCTextSplitterComponent):
display_name = "CharacterTextSplitter"
description = "Split text by number of characters."
documentation = "https://docs.langflow.org/components/text-splitters#charactertextsplitter"
name = "CharacterTextSplitter"
icon = "LangChain"
inputs = [
IntInput(
name="chunk_size",
display_name="Chunk Size",
info="The maximum length of each chunk.",
value=1000,
),
IntInput(
name="chunk_overlap",
display_name="Chunk Overlap",
info="The amount of overlap between chunks.",
value=200,
),
DataInput(
name="data_input",
display_name="Input",
info="The texts to split.",
input_types=["Document", "Data"],
),
MessageTextInput(
name="separator",
display_name="Separator",
info='The characters to split on.\nIf left empty defaults to "\\n\\n".',
),
]
def get_data_input(self) -> Any:
return self.data_input
def build_text_splitter(self) -> TextSplitter:
separator = unescape_string(self.separator) if self.separator else "\n\n"
return CharacterTextSplitter(
chunk_overlap=self.chunk_overlap,
chunk_size=self.chunk_size,
separator=separator,
)