Tai Truong
fix readme
d202ada
from langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data, retrieve_file_paths
from langflow.custom import Component
from langflow.io import BoolInput, IntInput, MessageTextInput
from langflow.schema import Data
from langflow.template import Output
class DirectoryComponent(Component):
display_name = "Directory"
description = "Recursively load files from a directory."
icon = "folder"
name = "Directory"
inputs = [
MessageTextInput(
name="path",
display_name="Path",
info="Path to the directory to load files from.",
),
MessageTextInput(
name="types",
display_name="Types",
info="File types to load. Leave empty to load all default supported types.",
is_list=True,
),
IntInput(
name="depth",
display_name="Depth",
info="Depth to search for files.",
value=0,
),
IntInput(
name="max_concurrency",
display_name="Max Concurrency",
advanced=True,
info="Maximum concurrency for loading files.",
value=2,
),
BoolInput(
name="load_hidden",
display_name="Load Hidden",
advanced=True,
info="If true, hidden files will be loaded.",
),
BoolInput(
name="recursive",
display_name="Recursive",
advanced=True,
info="If true, the search will be recursive.",
),
BoolInput(
name="silent_errors",
display_name="Silent Errors",
advanced=True,
info="If true, errors will not raise an exception.",
),
BoolInput(
name="use_multithreading",
display_name="Use Multithreading",
advanced=True,
info="If true, multithreading will be used.",
),
]
outputs = [
Output(display_name="Data", name="data", method="load_directory"),
]
def load_directory(self) -> list[Data]:
path = self.path
types = (
self.types if self.types and self.types != [""] else TEXT_FILE_TYPES
) # self.types is already a list due to is_list=True
depth = self.depth
max_concurrency = self.max_concurrency
load_hidden = self.load_hidden
recursive = self.recursive
silent_errors = self.silent_errors
use_multithreading = self.use_multithreading
resolved_path = self.resolve_path(path)
file_paths = retrieve_file_paths(
resolved_path, load_hidden=load_hidden, recursive=recursive, depth=depth, types=types
)
if types:
file_paths = [fp for fp in file_paths if any(fp.endswith(ext) for ext in types)]
loaded_data = []
if use_multithreading:
loaded_data = parallel_load_data(file_paths, silent_errors=silent_errors, max_concurrency=max_concurrency)
else:
loaded_data = [parse_text_file_to_data(file_path, silent_errors=silent_errors) for file_path in file_paths]
loaded_data = list(filter(None, loaded_data))
self.status = loaded_data
return loaded_data # type: ignore[return-value]