|
from smolagents.tools import Tool |
|
import requests |
|
from typing import List, Dict |
|
from bs4 import BeautifulSoup |
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
class OdooDocumentationSearchTool(Tool): |
|
name = "odoo_documentation_search" |
|
description = "Searches the Odoo documentation for functional or technical queries and returns related results for a specific Odoo version." |
|
|
|
inputs = { |
|
"query": {"type": "string", "description": "The search query (e.g., 'how to create a new module')"}, |
|
"version": {"type": "string", "description": "The Odoo version to search (e.g., '16.0', '17.0', '18.0')"} |
|
} |
|
|
|
output_type = "array" |
|
|
|
def __init__(self, query=None): |
|
|
|
self.model = SentenceTransformer('all-MiniLM-L6-v2') |
|
self.is_initialized = True |
|
|
|
def forward(self, query: str, version: str) -> List[Dict]: |
|
""" |
|
Searches the Odoo documentation and returns related results using semantic search and reranking. |
|
""" |
|
base_url = f"https://www.odoo.com/documentation/{version}/" |
|
|
|
try: |
|
response = requests.get(base_url) |
|
response.raise_for_status() |
|
|
|
soup = BeautifulSoup(response.content, "html.parser") |
|
|
|
|
|
sections = [] |
|
for element in soup.find_all(['h1', 'h2', 'h3', 'p', 'li']): |
|
sections.append(element.get_text().strip()) |
|
|
|
|
|
section_embeddings = self.model.encode(sections, convert_to_tensor=True) |
|
query_embedding = self.model.encode(query, convert_to_tensor=True) |
|
|
|
|
|
cosine_scores = util.pytorch_cos_sim(query_embedding, section_embeddings)[0] |
|
|
|
|
|
section_scores = list(zip(sections, cosine_scores)) |
|
ranked_sections = sorted(section_scores, key=lambda x: x[1], reverse=True) |
|
|
|
|
|
reranked_sections = self.rerank_sections(ranked_sections[:10], query) |
|
|
|
|
|
top_n = 5 |
|
results = [] |
|
for section, score in reranked_sections[:top_n]: |
|
results.append({"Result": section, "Score": str(score.item())}) |
|
|
|
return results |
|
|
|
except requests.exceptions.RequestException as e: |
|
return [{"Error": f"Error fetching Odoo documentation: {str(e)}"}] |
|
|
|
def rerank_sections(self, ranked_sections: List[tuple], query: str) -> List[tuple]: |
|
""" |
|
Reranks the top-k sections based on a keyword-based approach. |
|
""" |
|
|
|
query_keywords = [word for word in query.lower().split() if word not in ['a', 'an', 'the', 'is', 'are', 'in', 'on', 'at', 'to', 'for', 'of']] |
|
|
|
|
|
reranked_sections = [] |
|
for section, score in ranked_sections: |
|
keyword_score = 0 |
|
for keyword in query_keywords: |
|
keyword_score += section.lower().count(keyword) |
|
|
|
|
|
adjusted_score = score + keyword_score |
|
|
|
reranked_sections.append((section, adjusted_score)) |
|
|
|
|
|
reranked_sections = sorted(reranked_sections, key=lambda x: x[1], reverse=True) |
|
|
|
return reranked_sections |
|
|