from langflow.custom import Component from langflow.io import ( DataInput, IntInput, Output, SecretStrInput, StrInput, ) from langflow.schema import Data class FirecrawlScrapeApi(Component): display_name: str = "FirecrawlScrapeApi" description: str = "Firecrawl Scrape API." name = "FirecrawlScrapeApi" output_types: list[str] = ["Document"] documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape" inputs = [ SecretStrInput( name="api_key", display_name="API Key", required=True, password=True, info="The API key to use Firecrawl API.", ), StrInput( name="url", display_name="URL", required=True, info="The URL to scrape.", ), IntInput( name="timeout", display_name="Timeout", info="Timeout in milliseconds for the request.", ), DataInput( name="scrapeOptions", display_name="Scrape Options", info="The page options to send with the request.", ), DataInput( # https://docs.firecrawl.dev/features/extract name="extractorOptions", display_name="Extractor Options", info="The extractor options to send with the request.", ), ] outputs = [ Output(display_name="Data", name="data", method="crawl"), ] def crawl(self) -> list[Data]: try: from firecrawl.firecrawl import FirecrawlApp except ImportError as e: msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`." raise ImportError(msg) from e params = self.scrapeOptions.__dict__["data"] if self.scrapeOptions else {} extractor_options_dict = self.extractorOptions.__dict__["data"] if self.extractorOptions else {} if extractor_options_dict: params["extract"] = extractor_options_dict app = FirecrawlApp(api_key=self.api_key) results = app.scrape_url(self.url, params=params) return Data(data=results)