Spaces:
Running
Running
import requests | |
from langchain.tools import StructuredTool | |
from loguru import logger | |
from pydantic import BaseModel, Field | |
from langflow.base.langchain_utilities.model import LCToolComponent | |
from langflow.field_typing import Tool | |
from langflow.inputs import SecretStrInput, StrInput | |
from langflow.schema import Data | |
class NotionPageContent(LCToolComponent): | |
display_name = "Page Content Viewer " | |
description = "Retrieve the content of a Notion page as plain text." | |
documentation = "https://docs.langflow.org/integrations/notion/page-content-viewer" | |
icon = "NotionDirectoryLoader" | |
inputs = [ | |
StrInput( | |
name="page_id", | |
display_name="Page ID", | |
info="The ID of the Notion page to retrieve.", | |
), | |
SecretStrInput( | |
name="notion_secret", | |
display_name="Notion Secret", | |
info="The Notion integration token.", | |
required=True, | |
), | |
] | |
class NotionPageContentSchema(BaseModel): | |
page_id: str = Field(..., description="The ID of the Notion page to retrieve.") | |
def run_model(self) -> Data: | |
result = self._retrieve_page_content(self.page_id) | |
if isinstance(result, str) and result.startswith("Error:"): | |
# An error occurred, return it as text | |
return Data(text=result) | |
# Success, return the content | |
return Data(text=result, data={"content": result}) | |
def build_tool(self) -> Tool: | |
return StructuredTool.from_function( | |
name="notion_page_content", | |
description="Retrieve the content of a Notion page as plain text.", | |
func=self._retrieve_page_content, | |
args_schema=self.NotionPageContentSchema, | |
) | |
def _retrieve_page_content(self, page_id: str) -> str: | |
blocks_url = f"https://api.notion.com/v1/blocks/{page_id}/children?page_size=100" | |
headers = { | |
"Authorization": f"Bearer {self.notion_secret}", | |
"Notion-Version": "2022-06-28", | |
} | |
try: | |
blocks_response = requests.get(blocks_url, headers=headers, timeout=10) | |
blocks_response.raise_for_status() | |
blocks_data = blocks_response.json() | |
return self.parse_blocks(blocks_data.get("results", [])) | |
except requests.exceptions.RequestException as e: | |
error_message = f"Error: Failed to retrieve Notion page content. {e}" | |
if hasattr(e, "response") and e.response is not None: | |
error_message += f" Status code: {e.response.status_code}, Response: {e.response.text}" | |
return error_message | |
except Exception as e: # noqa: BLE001 | |
logger.opt(exception=True).debug("Error retrieving Notion page content") | |
return f"Error: An unexpected error occurred while retrieving Notion page content. {e}" | |
def parse_blocks(self, blocks: list) -> str: | |
content = "" | |
for block in blocks: | |
block_type = block.get("type") | |
if block_type in {"paragraph", "heading_1", "heading_2", "heading_3", "quote"}: | |
content += self.parse_rich_text(block[block_type].get("rich_text", [])) + "\n\n" | |
elif block_type in {"bulleted_list_item", "numbered_list_item"}: | |
content += self.parse_rich_text(block[block_type].get("rich_text", [])) + "\n" | |
elif block_type == "to_do": | |
content += self.parse_rich_text(block["to_do"].get("rich_text", [])) + "\n" | |
elif block_type == "code": | |
content += self.parse_rich_text(block["code"].get("rich_text", [])) + "\n\n" | |
elif block_type == "image": | |
content += f"[Image: {block['image'].get('external', {}).get('url', 'No URL')}]\n\n" | |
elif block_type == "divider": | |
content += "---\n\n" | |
return content.strip() | |
def parse_rich_text(self, rich_text: list) -> str: | |
return "".join(segment.get("plain_text", "") for segment in rich_text) | |
def __call__(self, *args, **kwargs): | |
return self._retrieve_page_content(*args, **kwargs) | |