Game4all commited on
Commit
cd59c29
·
1 Parent(s): 3beb07e

Add bing search backend

Browse files
Files changed (2) hide show
  1. app.py +30 -1
  2. backends.py +51 -2
app.py CHANGED
@@ -12,7 +12,7 @@ import logging
12
  import re
13
  import uvicorn
14
 
15
- from backends import APISearchResults, APIPatentResults, query_brave_search, query_ddg_search, query_google_patents
16
 
17
  logging.basicConfig(level=logging.INFO)
18
 
@@ -89,6 +89,25 @@ async def search_brave(params: APISearchParams) -> APISearchResults:
89
  return APISearchResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
90
 
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  @app.post("/search_duck")
93
  async def search_duck(params: APISearchParams) -> APISearchResults:
94
  """Searches duckduckgo for the specified queries and returns the found documents"""
@@ -131,6 +150,16 @@ async def search(params: APISearchParams):
131
  except Exception as e:
132
  logging.error(
133
  f"Failed to query Brave Search with query `{q}`: {e}")
 
 
 
 
 
 
 
 
 
 
134
 
135
  if len(results) == 0:
136
  return APISearchResults(results=[], error="All backends are rate-limited.")
 
12
  import re
13
  import uvicorn
14
 
15
+ from backends import APISearchResults, APIPatentResults, query_bing_search, query_brave_search, query_ddg_search, query_google_patents
16
 
17
  logging.basicConfig(level=logging.INFO)
18
 
 
89
  return APISearchResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
90
 
91
 
92
+ @app.post("/search_bing")
93
+ async def search_bing(params: APISearchParams) -> APISearchResults:
94
+ """Searches Bing search for the specified queries and returns the found documents."""
95
+ results = []
96
+ last_exception: Optional[Exception] = None
97
+ for q in params.queries:
98
+ logging.info(f"Searching Bing search with query `{q}`")
99
+ try:
100
+ res = await query_brave_search(pw_browser, q, params.n_results)
101
+ results.extend(res)
102
+ except Exception as e:
103
+ last_exception = e
104
+ backend_status["bing"] = "rate-limited"
105
+ logging.error(
106
+ f"Failed to query Bing search with query `{q}`: {e}")
107
+
108
+ return APISearchResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
109
+
110
+
111
  @app.post("/search_duck")
112
  async def search_duck(params: APISearchParams) -> APISearchResults:
113
  """Searches duckduckgo for the specified queries and returns the found documents"""
 
150
  except Exception as e:
151
  logging.error(
152
  f"Failed to query Brave Search with query `{q}`: {e}")
153
+ logging.info("Trying with next browser backend.")
154
+
155
+ try:
156
+ logging.info(f"Querying Bing with query: `{q}`")
157
+ res = await query_bing_search(pw_browser, q, params.n_results)
158
+ results.extend(res)
159
+ continue
160
+ except Exception as e:
161
+ logging.error(f"Failed to query Bing search with query `{q}`: {e}")
162
+ logging.info("Trying with next browser backend.")
163
 
164
  if len(results) == 0:
165
  return APISearchResults(results=[], error="All backends are rate-limited.")
backends.py CHANGED
@@ -36,6 +36,7 @@ async def playwright_open_page(browser: Browser):
36
  await context.close()
37
 
38
 
 
39
  async def query_google_patents(browser: Browser, q: str, n_results: int = 10):
40
  """Queries google patents for the specified query and number of results. Returns relevant patents"""
41
 
@@ -93,11 +94,9 @@ async def query_brave_search(browser: Browser, q: str, n_results: int = 10):
93
  results_cards = await page.locator('.snippet').all()
94
 
95
  if len(results_cards) == 0:
96
- logging.warning(f"No results for query: {q}")
97
  page_content = await page.content()
98
 
99
  if "suspicious" in page_content:
100
- logging.warning("Brave search flagged browser as suspicious.")
101
  raise BraveSearchBlockedException()
102
 
103
  results = []
@@ -128,6 +127,56 @@ async def query_brave_search(browser: Browser, q: str, n_results: int = 10):
128
  return results
129
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  async def query_ddg_search(q: str, n_results: int = 10):
132
  """Queries duckduckgo search for the specified query"""
133
  ddgs = DDGS()
 
36
  await context.close()
37
 
38
 
39
+ #TODO: update to return same format for results
40
  async def query_google_patents(browser: Browser, q: str, n_results: int = 10):
41
  """Queries google patents for the specified query and number of results. Returns relevant patents"""
42
 
 
94
  results_cards = await page.locator('.snippet').all()
95
 
96
  if len(results_cards) == 0:
 
97
  page_content = await page.content()
98
 
99
  if "suspicious" in page_content:
 
100
  raise BraveSearchBlockedException()
101
 
102
  results = []
 
127
  return results
128
 
129
 
130
+ async def query_bing_search(browser: Browser, q: str, n_results: int = 10):
131
+ """Queries bing search for the specified query"""
132
+ async with playwright_open_page(browser) as page:
133
+ async def _block_resources(route, request):
134
+ if request.resource_type in ["stylesheet", "image"]:
135
+ await route.abort()
136
+ else:
137
+ await route.continue_()
138
+
139
+ await page.route("**/*", _block_resources)
140
+
141
+ url = f"https://www.bing.com/search?q={quote_plus(q)}"
142
+ await page.goto(url)
143
+
144
+ await page.wait_for_selector("li.b_algo")
145
+
146
+ results = []
147
+
148
+ items = await page.query_selector_all("li.b_algo")
149
+ for item in items[:n_results]:
150
+ title_el = await item.query_selector("h2 > a")
151
+ url = await title_el.get_attribute("href") if title_el else None
152
+ title = await title_el.inner_text() if title_el else ""
153
+
154
+ snippet = ""
155
+
156
+ # Try several fallback selectors
157
+ for selector in [
158
+ "div.b_caption p", # typical snippet
159
+ "div.b_caption", # sometimes snippet is here
160
+ "div.b_snippet", # used in some result types
161
+ "div.b_text", # used in some panels
162
+ "p" # fallback to any paragraph
163
+ ]:
164
+ snippet_el = await item.query_selector(selector)
165
+ if snippet_el:
166
+ snippet = await snippet_el.inner_text()
167
+ if snippet.strip():
168
+ break
169
+
170
+ if title and url:
171
+ results.append({
172
+ "title": title.strip(),
173
+ "href": url.strip(),
174
+ "body": snippet.strip()
175
+ })
176
+
177
+ return results
178
+
179
+
180
  async def query_ddg_search(q: str, n_results: int = 10):
181
  """Queries duckduckgo search for the specified query"""
182
  ddgs = DDGS()