Spaces:
Running
Running
from langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data, retrieve_file_paths | |
from langflow.custom import Component | |
from langflow.io import BoolInput, IntInput, MessageTextInput | |
from langflow.schema import Data | |
from langflow.template import Output | |
class DirectoryComponent(Component): | |
display_name = "Directory" | |
description = "Recursively load files from a directory." | |
icon = "folder" | |
name = "Directory" | |
inputs = [ | |
MessageTextInput( | |
name="path", | |
display_name="Path", | |
info="Path to the directory to load files from.", | |
), | |
MessageTextInput( | |
name="types", | |
display_name="Types", | |
info="File types to load. Leave empty to load all default supported types.", | |
is_list=True, | |
), | |
IntInput( | |
name="depth", | |
display_name="Depth", | |
info="Depth to search for files.", | |
value=0, | |
), | |
IntInput( | |
name="max_concurrency", | |
display_name="Max Concurrency", | |
advanced=True, | |
info="Maximum concurrency for loading files.", | |
value=2, | |
), | |
BoolInput( | |
name="load_hidden", | |
display_name="Load Hidden", | |
advanced=True, | |
info="If true, hidden files will be loaded.", | |
), | |
BoolInput( | |
name="recursive", | |
display_name="Recursive", | |
advanced=True, | |
info="If true, the search will be recursive.", | |
), | |
BoolInput( | |
name="silent_errors", | |
display_name="Silent Errors", | |
advanced=True, | |
info="If true, errors will not raise an exception.", | |
), | |
BoolInput( | |
name="use_multithreading", | |
display_name="Use Multithreading", | |
advanced=True, | |
info="If true, multithreading will be used.", | |
), | |
] | |
outputs = [ | |
Output(display_name="Data", name="data", method="load_directory"), | |
] | |
def load_directory(self) -> list[Data]: | |
path = self.path | |
types = ( | |
self.types if self.types and self.types != [""] else TEXT_FILE_TYPES | |
) # self.types is already a list due to is_list=True | |
depth = self.depth | |
max_concurrency = self.max_concurrency | |
load_hidden = self.load_hidden | |
recursive = self.recursive | |
silent_errors = self.silent_errors | |
use_multithreading = self.use_multithreading | |
resolved_path = self.resolve_path(path) | |
file_paths = retrieve_file_paths( | |
resolved_path, load_hidden=load_hidden, recursive=recursive, depth=depth, types=types | |
) | |
if types: | |
file_paths = [fp for fp in file_paths if any(fp.endswith(ext) for ext in types)] | |
loaded_data = [] | |
if use_multithreading: | |
loaded_data = parallel_load_data(file_paths, silent_errors=silent_errors, max_concurrency=max_concurrency) | |
else: | |
loaded_data = [parse_text_file_to_data(file_path, silent_errors=silent_errors) for file_path in file_paths] | |
loaded_data = list(filter(None, loaded_data)) | |
self.status = loaded_data | |
return loaded_data # type: ignore[return-value] | |