Spaces:

White74195
/

AI_Agent_OWL_tester

Configuration error

App Files Files Community

AI_Agent_OWL_tester / owl /camel /toolkits /notion_toolkit.py

White74195

Upload 302 files

b9d9271 verified 4 months ago

raw

history blame contribute delete

9.51 kB

	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
	import os
	from typing import List, Optional, cast

	from camel.toolkits import FunctionTool
	from camel.toolkits.base import BaseToolkit


	def get_plain_text_from_rich_text(rich_text: List[dict]) -> str:
	r"""Extracts plain text from a list of rich text elements.

	Args:
	rich_text: A list of dictionaries representing rich text elements.
	Each dictionary should contain a key named "plain_text" with
	the plain text content.

	Returns:
	str: A string containing the combined plain text from all elements,
	joined together.
	"""
	plain_texts = [element.get("plain_text", "") for element in rich_text]
	return "".join(plain_texts)


	def get_media_source_text(block: dict) -> str:
	r"""Extracts the source URL and optional caption from a
	Notion media block.

	Args:
	block: A dictionary representing a Notion media block.

	Returns:
	A string containing the source URL and caption (if available),
	separated by a colon.
	"""
	block_type = block.get("type", "Unknown Type")
	block_content = block.get(block_type, {})

	# Extract source URL based on available types
	source = (
	block_content.get("external", {}).get("url")
	or block_content.get("file", {}).get("url")
	or block_content.get(
	"url", "[Missing case for media block types]: " + block_type
	)
	)

	# Extract caption if available
	caption_elements = block_content.get("caption", [])
	if caption_elements:
	caption = get_plain_text_from_rich_text(caption_elements)
	return f"{caption}: {source}"

	return source


	class NotionToolkit(BaseToolkit):
	r"""A toolkit for retrieving information from the user's notion pages.

	Attributes:
	notion_token (Optional[str], optional): The notion_token used to
	interact with notion APIs.(default: :obj:`None`)
	notion_client (module): The notion module for interacting with
	the notion APIs.
	"""

	def __init__(
	self,
	notion_token: Optional[str] = None,
	) -> None:
	r"""Initializes the NotionToolkit.

	Args:
	notion_token (Optional[str], optional): The optional notion_token
	used to interact with notion APIs.(default: :obj:`None`)
	"""
	from notion_client import Client

	self.notion_token = notion_token or os.environ.get("NOTION_TOKEN")
	self.notion_client = Client(auth=self.notion_token)

	def list_all_users(self) -> List[dict]:
	r"""Lists all users via the Notion integration.

	Returns:
	List[dict]: A list of user objects with type, name, and workspace.
	"""
	all_users_info: List[dict] = []
	cursor = None

	while True:
	response = cast(
	dict,
	self.notion_client.users.list(start_cursor=cursor),
	)
	all_users_info.extend(response["results"])

	if not response["has_more"]:
	break

	cursor = response["next_cursor"]

	formatted_users = [
	{
	"type": user["type"],
	"name": user["name"],
	"workspace": user.get(user.get("type"), {}).get(
	"workspace_name", ""
	),
	}
	for user in all_users_info
	]

	return formatted_users

	def list_all_pages(self) -> List[dict]:
	r"""Lists all pages in the Notion workspace.

	Returns:
	List[dict]: A list of page objects with title and id.
	"""
	all_pages_info: List[dict] = []
	cursor = None

	while True:
	response = cast(
	dict,
	self.notion_client.search(
	filter={"property": "object", "value": "page"},
	start_cursor=cursor,
	),
	)
	all_pages_info.extend(response["results"])

	if not response["has_more"]:
	break

	cursor = response["next_cursor"]

	formatted_pages = [
	{
	"id": page.get("id"),
	"title": next(
	(
	title.get("text", {}).get("content")
	for title in page["properties"]
	.get("title", {})
	.get("title", [])
	if title["type"] == "text"
	),
	None,
	),
	}
	for page in all_pages_info
	]

	return formatted_pages

	def get_notion_block_text_content(self, block_id: str) -> str:
	r"""Retrieves the text content of a Notion block.

	Args:
	block_id (str): The ID of the Notion block to retrieve.

	Returns:
	str: The text content of a Notion block, containing all
	the sub blocks.
	"""
	blocks: List[dict] = []
	cursor = None

	while True:
	response = cast(
	dict,
	self.notion_client.blocks.children.list(
	block_id=block_id, start_cursor=cursor
	),
	)
	blocks.extend(response["results"])

	if not response["has_more"]:
	break

	cursor = response["next_cursor"]

	block_text_content = " ".join(
	[self.get_text_from_block(sub_block) for sub_block in blocks]
	)

	return block_text_content

	def get_text_from_block(self, block: dict) -> str:
	r"""Extracts plain text from a Notion block based on its type.

	Args:
	block (dict): A dictionary representing a Notion block.

	Returns:
	str: A string containing the extracted plain text and block type.
	"""
	# Get rich text for supported block types
	if block.get(block.get("type"), {}).get("rich_text"):
	# Empty string if it's an empty line
	text = get_plain_text_from_rich_text(
	block[block["type"]]["rich_text"]
	)
	else:
	# Handle block types by case
	block_type = block.get("type")
	if block_type == "unsupported":
	text = "[Unsupported block type]"
	elif block_type == "bookmark":
	text = block["bookmark"]["url"]
	elif block_type == "child_database":
	text = block["child_database"]["title"]
	# Use other API endpoints for full database data
	elif block_type == "child_page":
	text = block["child_page"]["title"]
	elif block_type in ("embed", "video", "file", "image", "pdf"):
	text = get_media_source_text(block)
	elif block_type == "equation":
	text = block["equation"]["expression"]
	elif block_type == "link_preview":
	text = block["link_preview"]["url"]
	elif block_type == "synced_block":
	if block["synced_block"].get("synced_from"):
	text = (
	f"This block is synced with a block with ID: "
	f"""
	{block['synced_block']['synced_from']
	[block['synced_block']['synced_from']['type']]}
	"""
	)
	else:
	text = (
	"Source sync block that another"
	+ "blocked is synced with."
	)
	elif block_type == "table":
	text = f"Table width: {block['table']['table_width']}"
	# Fetch children for full table data
	elif block_type == "table_of_contents":
	text = f"ToC color: {block['table_of_contents']['color']}"
	elif block_type in ("breadcrumb", "column_list", "divider"):
	text = "No text available"
	else:
	text = "[Needs case added]"

	# Query children for blocks with children
	if block.get("has_children"):
	text += self.get_notion_block_text_content(block["id"])

	return text

	def get_tools(self) -> List[FunctionTool]:
	r"""Returns a list of FunctionTool objects representing the
	functions in the toolkit.

	Returns:
	List[FunctionTool]: A list of FunctionTool objects
	representing the functions in the toolkit.
	"""
	return [
	FunctionTool(self.list_all_pages),
	FunctionTool(self.list_all_users),
	FunctionTool(self.get_notion_block_text_content),
	]