Add bing search backend
Browse files- app.py +30 -1
- backends.py +51 -2
app.py
CHANGED
@@ -12,7 +12,7 @@ import logging
|
|
12 |
import re
|
13 |
import uvicorn
|
14 |
|
15 |
-
from backends import APISearchResults, APIPatentResults, query_brave_search, query_ddg_search, query_google_patents
|
16 |
|
17 |
logging.basicConfig(level=logging.INFO)
|
18 |
|
@@ -89,6 +89,25 @@ async def search_brave(params: APISearchParams) -> APISearchResults:
|
|
89 |
return APISearchResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
|
90 |
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
@app.post("/search_duck")
|
93 |
async def search_duck(params: APISearchParams) -> APISearchResults:
|
94 |
"""Searches duckduckgo for the specified queries and returns the found documents"""
|
@@ -131,6 +150,16 @@ async def search(params: APISearchParams):
|
|
131 |
except Exception as e:
|
132 |
logging.error(
|
133 |
f"Failed to query Brave Search with query `{q}`: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
if len(results) == 0:
|
136 |
return APISearchResults(results=[], error="All backends are rate-limited.")
|
|
|
12 |
import re
|
13 |
import uvicorn
|
14 |
|
15 |
+
from backends import APISearchResults, APIPatentResults, query_bing_search, query_brave_search, query_ddg_search, query_google_patents
|
16 |
|
17 |
logging.basicConfig(level=logging.INFO)
|
18 |
|
|
|
89 |
return APISearchResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
|
90 |
|
91 |
|
92 |
+
@app.post("/search_bing")
|
93 |
+
async def search_bing(params: APISearchParams) -> APISearchResults:
|
94 |
+
"""Searches Bing search for the specified queries and returns the found documents."""
|
95 |
+
results = []
|
96 |
+
last_exception: Optional[Exception] = None
|
97 |
+
for q in params.queries:
|
98 |
+
logging.info(f"Searching Bing search with query `{q}`")
|
99 |
+
try:
|
100 |
+
res = await query_brave_search(pw_browser, q, params.n_results)
|
101 |
+
results.extend(res)
|
102 |
+
except Exception as e:
|
103 |
+
last_exception = e
|
104 |
+
backend_status["bing"] = "rate-limited"
|
105 |
+
logging.error(
|
106 |
+
f"Failed to query Bing search with query `{q}`: {e}")
|
107 |
+
|
108 |
+
return APISearchResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
|
109 |
+
|
110 |
+
|
111 |
@app.post("/search_duck")
|
112 |
async def search_duck(params: APISearchParams) -> APISearchResults:
|
113 |
"""Searches duckduckgo for the specified queries and returns the found documents"""
|
|
|
150 |
except Exception as e:
|
151 |
logging.error(
|
152 |
f"Failed to query Brave Search with query `{q}`: {e}")
|
153 |
+
logging.info("Trying with next browser backend.")
|
154 |
+
|
155 |
+
try:
|
156 |
+
logging.info(f"Querying Bing with query: `{q}`")
|
157 |
+
res = await query_bing_search(pw_browser, q, params.n_results)
|
158 |
+
results.extend(res)
|
159 |
+
continue
|
160 |
+
except Exception as e:
|
161 |
+
logging.error(f"Failed to query Bing search with query `{q}`: {e}")
|
162 |
+
logging.info("Trying with next browser backend.")
|
163 |
|
164 |
if len(results) == 0:
|
165 |
return APISearchResults(results=[], error="All backends are rate-limited.")
|
backends.py
CHANGED
@@ -36,6 +36,7 @@ async def playwright_open_page(browser: Browser):
|
|
36 |
await context.close()
|
37 |
|
38 |
|
|
|
39 |
async def query_google_patents(browser: Browser, q: str, n_results: int = 10):
|
40 |
"""Queries google patents for the specified query and number of results. Returns relevant patents"""
|
41 |
|
@@ -93,11 +94,9 @@ async def query_brave_search(browser: Browser, q: str, n_results: int = 10):
|
|
93 |
results_cards = await page.locator('.snippet').all()
|
94 |
|
95 |
if len(results_cards) == 0:
|
96 |
-
logging.warning(f"No results for query: {q}")
|
97 |
page_content = await page.content()
|
98 |
|
99 |
if "suspicious" in page_content:
|
100 |
-
logging.warning("Brave search flagged browser as suspicious.")
|
101 |
raise BraveSearchBlockedException()
|
102 |
|
103 |
results = []
|
@@ -128,6 +127,56 @@ async def query_brave_search(browser: Browser, q: str, n_results: int = 10):
|
|
128 |
return results
|
129 |
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
async def query_ddg_search(q: str, n_results: int = 10):
|
132 |
"""Queries duckduckgo search for the specified query"""
|
133 |
ddgs = DDGS()
|
|
|
36 |
await context.close()
|
37 |
|
38 |
|
39 |
+
#TODO: update to return same format for results
|
40 |
async def query_google_patents(browser: Browser, q: str, n_results: int = 10):
|
41 |
"""Queries google patents for the specified query and number of results. Returns relevant patents"""
|
42 |
|
|
|
94 |
results_cards = await page.locator('.snippet').all()
|
95 |
|
96 |
if len(results_cards) == 0:
|
|
|
97 |
page_content = await page.content()
|
98 |
|
99 |
if "suspicious" in page_content:
|
|
|
100 |
raise BraveSearchBlockedException()
|
101 |
|
102 |
results = []
|
|
|
127 |
return results
|
128 |
|
129 |
|
130 |
+
async def query_bing_search(browser: Browser, q: str, n_results: int = 10):
|
131 |
+
"""Queries bing search for the specified query"""
|
132 |
+
async with playwright_open_page(browser) as page:
|
133 |
+
async def _block_resources(route, request):
|
134 |
+
if request.resource_type in ["stylesheet", "image"]:
|
135 |
+
await route.abort()
|
136 |
+
else:
|
137 |
+
await route.continue_()
|
138 |
+
|
139 |
+
await page.route("**/*", _block_resources)
|
140 |
+
|
141 |
+
url = f"https://www.bing.com/search?q={quote_plus(q)}"
|
142 |
+
await page.goto(url)
|
143 |
+
|
144 |
+
await page.wait_for_selector("li.b_algo")
|
145 |
+
|
146 |
+
results = []
|
147 |
+
|
148 |
+
items = await page.query_selector_all("li.b_algo")
|
149 |
+
for item in items[:n_results]:
|
150 |
+
title_el = await item.query_selector("h2 > a")
|
151 |
+
url = await title_el.get_attribute("href") if title_el else None
|
152 |
+
title = await title_el.inner_text() if title_el else ""
|
153 |
+
|
154 |
+
snippet = ""
|
155 |
+
|
156 |
+
# Try several fallback selectors
|
157 |
+
for selector in [
|
158 |
+
"div.b_caption p", # typical snippet
|
159 |
+
"div.b_caption", # sometimes snippet is here
|
160 |
+
"div.b_snippet", # used in some result types
|
161 |
+
"div.b_text", # used in some panels
|
162 |
+
"p" # fallback to any paragraph
|
163 |
+
]:
|
164 |
+
snippet_el = await item.query_selector(selector)
|
165 |
+
if snippet_el:
|
166 |
+
snippet = await snippet_el.inner_text()
|
167 |
+
if snippet.strip():
|
168 |
+
break
|
169 |
+
|
170 |
+
if title and url:
|
171 |
+
results.append({
|
172 |
+
"title": title.strip(),
|
173 |
+
"href": url.strip(),
|
174 |
+
"body": snippet.strip()
|
175 |
+
})
|
176 |
+
|
177 |
+
return results
|
178 |
+
|
179 |
+
|
180 |
async def query_ddg_search(q: str, n_results: int = 10):
|
181 |
"""Queries duckduckgo search for the specified query"""
|
182 |
ddgs = DDGS()
|