from langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data, retrieve_file_paths from langflow.custom import Component from langflow.io import BoolInput, IntInput, MessageTextInput from langflow.schema import Data from langflow.template import Output class DirectoryComponent(Component): display_name = "Directory" description = "Recursively load files from a directory." icon = "folder" name = "Directory" inputs = [ MessageTextInput( name="path", display_name="Path", info="Path to the directory to load files from.", ), MessageTextInput( name="types", display_name="Types", info="File types to load. Leave empty to load all default supported types.", is_list=True, ), IntInput( name="depth", display_name="Depth", info="Depth to search for files.", value=0, ), IntInput( name="max_concurrency", display_name="Max Concurrency", advanced=True, info="Maximum concurrency for loading files.", value=2, ), BoolInput( name="load_hidden", display_name="Load Hidden", advanced=True, info="If true, hidden files will be loaded.", ), BoolInput( name="recursive", display_name="Recursive", advanced=True, info="If true, the search will be recursive.", ), BoolInput( name="silent_errors", display_name="Silent Errors", advanced=True, info="If true, errors will not raise an exception.", ), BoolInput( name="use_multithreading", display_name="Use Multithreading", advanced=True, info="If true, multithreading will be used.", ), ] outputs = [ Output(display_name="Data", name="data", method="load_directory"), ] def load_directory(self) -> list[Data]: path = self.path types = ( self.types if self.types and self.types != [""] else TEXT_FILE_TYPES ) # self.types is already a list due to is_list=True depth = self.depth max_concurrency = self.max_concurrency load_hidden = self.load_hidden recursive = self.recursive silent_errors = self.silent_errors use_multithreading = self.use_multithreading resolved_path = self.resolve_path(path) file_paths = retrieve_file_paths( resolved_path, load_hidden=load_hidden, recursive=recursive, depth=depth, types=types ) if types: file_paths = [fp for fp in file_paths if any(fp.endswith(ext) for ext in types)] loaded_data = [] if use_multithreading: loaded_data = parallel_load_data(file_paths, silent_errors=silent_errors, max_concurrency=max_concurrency) else: loaded_data = [parse_text_file_to_data(file_path, silent_errors=silent_errors) for file_path in file_paths] loaded_data = list(filter(None, loaded_data)) self.status = loaded_data return loaded_data # type: ignore[return-value]