Spaces:
Running
Running
import json | |
from typing import Any | |
from urllib.parse import urljoin | |
import httpx | |
from langchain_core.tools import StructuredTool, ToolException | |
from pydantic import BaseModel | |
from pydantic.v1 import Field | |
from langflow.base.langchain_utilities.model import LCToolComponent | |
from langflow.field_typing import Tool | |
from langflow.inputs import IntInput, MultilineInput, NestedDictInput, SecretStrInput, StrInput | |
from langflow.schema import Data | |
class GleanSearchAPISchema(BaseModel): | |
query: str = Field(..., description="The search query") | |
page_size: int = Field(10, description="Maximum number of results to return") | |
request_options: dict[str, Any] | None = Field(default_factory=dict, description="Request Options") | |
class GleanAPIWrapper(BaseModel): | |
"""Wrapper around Glean API.""" | |
glean_api_url: str | |
glean_access_token: str | |
act_as: str = "[email protected]" # TODO: Detect this | |
def _prepare_request( | |
self, | |
query: str, | |
page_size: int = 10, | |
request_options: dict[str, Any] | None = None, | |
) -> dict: | |
# Ensure there's a trailing slash | |
url = self.glean_api_url | |
if not url.endswith("/"): | |
url += "/" | |
return { | |
"url": urljoin(url, "search"), | |
"headers": { | |
"Authorization": f"Bearer {self.glean_access_token}", | |
"X-Scio-ActAs": self.act_as, | |
}, | |
"payload": { | |
"query": query, | |
"pageSize": page_size, | |
"requestOptions": request_options, | |
}, | |
} | |
def results(self, query: str, **kwargs: Any) -> list[dict[str, Any]]: | |
results = self._search_api_results(query, **kwargs) | |
if len(results) == 0: | |
msg = "No good Glean Search Result was found" | |
raise AssertionError(msg) | |
return results | |
def run(self, query: str, **kwargs: Any) -> list[dict[str, Any]]: | |
try: | |
results = self.results(query, **kwargs) | |
processed_results = [] | |
for result in results: | |
if "title" in result: | |
result["snippets"] = result.get("snippets", [{"snippet": {"text": result["title"]}}]) | |
if "text" not in result["snippets"][0]: | |
result["snippets"][0]["text"] = result["title"] | |
processed_results.append(result) | |
except Exception as e: | |
error_message = f"Error in Glean Search API: {e!s}" | |
raise ToolException(error_message) from e | |
return processed_results | |
def _search_api_results(self, query: str, **kwargs: Any) -> list[dict[str, Any]]: | |
request_details = self._prepare_request(query, **kwargs) | |
response = httpx.post( | |
request_details["url"], | |
json=request_details["payload"], | |
headers=request_details["headers"], | |
) | |
response.raise_for_status() | |
response_json = response.json() | |
return response_json.get("results", []) | |
def _result_as_string(result: dict) -> str: | |
return json.dumps(result, indent=4) | |
class GleanSearchAPIComponent(LCToolComponent): | |
display_name = "Glean Search API" | |
description = "Call Glean Search API" | |
name = "GleanAPI" | |
icon = "Glean" | |
inputs = [ | |
StrInput( | |
name="glean_api_url", | |
display_name="Glean API URL", | |
required=True, | |
), | |
SecretStrInput(name="glean_access_token", display_name="Glean Access Token", required=True), | |
MultilineInput(name="query", display_name="Query", required=True), | |
IntInput(name="page_size", display_name="Page Size", value=10), | |
NestedDictInput(name="request_options", display_name="Request Options", required=False), | |
] | |
def build_tool(self) -> Tool: | |
wrapper = self._build_wrapper( | |
glean_api_url=self.glean_api_url, | |
glean_access_token=self.glean_access_token, | |
) | |
tool = StructuredTool.from_function( | |
name="glean_search_api", | |
description="Search Glean for relevant results.", | |
func=wrapper.run, | |
args_schema=GleanSearchAPISchema, | |
) | |
self.status = "Glean Search API Tool for Langchain" | |
return tool | |
def run_model(self) -> list[Data]: | |
tool = self.build_tool() | |
results = tool.run( | |
{ | |
"query": self.query, | |
"page_size": self.page_size, | |
"request_options": self.request_options, | |
} | |
) | |
# Build the data | |
data = [Data(data=result, text=result["snippets"][0]["text"]) for result in results] | |
self.status = data # type: ignore[assignment] | |
return data | |
def _build_wrapper( | |
self, | |
glean_api_url: str, | |
glean_access_token: str, | |
): | |
return GleanAPIWrapper( | |
glean_api_url=glean_api_url, | |
glean_access_token=glean_access_token, | |
) | |