import requests from langchain.tools import StructuredTool from loguru import logger from pydantic import BaseModel, Field from langflow.base.langchain_utilities.model import LCToolComponent from langflow.field_typing import Tool from langflow.inputs import SecretStrInput, StrInput from langflow.schema import Data class NotionPageContent(LCToolComponent): display_name = "Page Content Viewer " description = "Retrieve the content of a Notion page as plain text." documentation = "https://docs.langflow.org/integrations/notion/page-content-viewer" icon = "NotionDirectoryLoader" inputs = [ StrInput( name="page_id", display_name="Page ID", info="The ID of the Notion page to retrieve.", ), SecretStrInput( name="notion_secret", display_name="Notion Secret", info="The Notion integration token.", required=True, ), ] class NotionPageContentSchema(BaseModel): page_id: str = Field(..., description="The ID of the Notion page to retrieve.") def run_model(self) -> Data: result = self._retrieve_page_content(self.page_id) if isinstance(result, str) and result.startswith("Error:"): # An error occurred, return it as text return Data(text=result) # Success, return the content return Data(text=result, data={"content": result}) def build_tool(self) -> Tool: return StructuredTool.from_function( name="notion_page_content", description="Retrieve the content of a Notion page as plain text.", func=self._retrieve_page_content, args_schema=self.NotionPageContentSchema, ) def _retrieve_page_content(self, page_id: str) -> str: blocks_url = f"https://api.notion.com/v1/blocks/{page_id}/children?page_size=100" headers = { "Authorization": f"Bearer {self.notion_secret}", "Notion-Version": "2022-06-28", } try: blocks_response = requests.get(blocks_url, headers=headers, timeout=10) blocks_response.raise_for_status() blocks_data = blocks_response.json() return self.parse_blocks(blocks_data.get("results", [])) except requests.exceptions.RequestException as e: error_message = f"Error: Failed to retrieve Notion page content. {e}" if hasattr(e, "response") and e.response is not None: error_message += f" Status code: {e.response.status_code}, Response: {e.response.text}" return error_message except Exception as e: # noqa: BLE001 logger.opt(exception=True).debug("Error retrieving Notion page content") return f"Error: An unexpected error occurred while retrieving Notion page content. {e}" def parse_blocks(self, blocks: list) -> str: content = "" for block in blocks: block_type = block.get("type") if block_type in {"paragraph", "heading_1", "heading_2", "heading_3", "quote"}: content += self.parse_rich_text(block[block_type].get("rich_text", [])) + "\n\n" elif block_type in {"bulleted_list_item", "numbered_list_item"}: content += self.parse_rich_text(block[block_type].get("rich_text", [])) + "\n" elif block_type == "to_do": content += self.parse_rich_text(block["to_do"].get("rich_text", [])) + "\n" elif block_type == "code": content += self.parse_rich_text(block["code"].get("rich_text", [])) + "\n\n" elif block_type == "image": content += f"[Image: {block['image'].get('external', {}).get('url', 'No URL')}]\n\n" elif block_type == "divider": content += "---\n\n" return content.strip() def parse_rich_text(self, rich_text: list) -> str: return "".join(segment.get("plain_text", "") for segment in rich_text) def __call__(self, *args, **kwargs): return self._retrieve_page_content(*args, **kwargs)