Spaces:
Running
Running
GitsSaikat
commited on
Update deep_research.py
Browse files- research/deep_research.py +13 -6
research/deep_research.py
CHANGED
@@ -89,14 +89,17 @@ async def generate_search_queries_async(session, user_query):
|
|
89 |
return []
|
90 |
return []
|
91 |
|
92 |
-
|
|
|
93 |
"""
|
94 |
Make an asynchronous SERPAPI call to perform a Google search for the provided query.
|
|
|
95 |
"""
|
96 |
params = {
|
97 |
"q": query,
|
98 |
"api_key": SERPAPI_API_KEY,
|
99 |
-
"engine": "google"
|
|
|
100 |
}
|
101 |
try:
|
102 |
async with session.get(SERPAPI_URL, params=params) as resp:
|
@@ -104,7 +107,7 @@ async def perform_search_async(session, query):
|
|
104 |
results = await resp.json()
|
105 |
if "organic_results" in results:
|
106 |
links = [item.get("link") for item in results["organic_results"] if "link" in item]
|
107 |
-
return links
|
108 |
else:
|
109 |
print("No organic results found in SERPAPI response.")
|
110 |
return []
|
@@ -274,9 +277,11 @@ async def process_link(session, link, user_query, search_query):
|
|
274 |
return SourcedContext(context, link)
|
275 |
return None
|
276 |
|
277 |
-
|
|
|
278 |
"""
|
279 |
Primary research procedure intended for integration with Streamlit.
|
|
|
280 |
"""
|
281 |
sourced_contexts = []
|
282 |
all_search_queries = []
|
@@ -292,9 +297,11 @@ async def research_flow(user_query, iteration_limit):
|
|
292 |
print(f"\n--- Iteration {iteration + 1} ---")
|
293 |
iteration_contexts = []
|
294 |
|
295 |
-
|
|
|
296 |
search_results = await asyncio.gather(*search_tasks)
|
297 |
|
|
|
298 |
unique_links = {}
|
299 |
for idx, links in enumerate(search_results):
|
300 |
query = new_search_queries[idx]
|
@@ -352,4 +359,4 @@ def main():
|
|
352 |
print(final_report)
|
353 |
|
354 |
if __name__ == "__main__":
|
355 |
-
main()
|
|
|
89 |
return []
|
90 |
return []
|
91 |
|
92 |
+
# Modify perform_search_async function
|
93 |
+
async def perform_search_async(session, query, result_limit=5):
|
94 |
"""
|
95 |
Make an asynchronous SERPAPI call to perform a Google search for the provided query.
|
96 |
+
result_limit: Maximum number of search results to return
|
97 |
"""
|
98 |
params = {
|
99 |
"q": query,
|
100 |
"api_key": SERPAPI_API_KEY,
|
101 |
+
"engine": "google",
|
102 |
+
"num": result_limit # Add this parameter for limiting results
|
103 |
}
|
104 |
try:
|
105 |
async with session.get(SERPAPI_URL, params=params) as resp:
|
|
|
107 |
results = await resp.json()
|
108 |
if "organic_results" in results:
|
109 |
links = [item.get("link") for item in results["organic_results"] if "link" in item]
|
110 |
+
return links[:result_limit] # Ensure we don't exceed the limit
|
111 |
else:
|
112 |
print("No organic results found in SERPAPI response.")
|
113 |
return []
|
|
|
277 |
return SourcedContext(context, link)
|
278 |
return None
|
279 |
|
280 |
+
# Modify research_flow function to accept search_limit parameter
|
281 |
+
async def research_flow(user_query, iteration_limit, search_limit=5):
|
282 |
"""
|
283 |
Primary research procedure intended for integration with Streamlit.
|
284 |
+
search_limit: Maximum number of search results per query
|
285 |
"""
|
286 |
sourced_contexts = []
|
287 |
all_search_queries = []
|
|
|
297 |
print(f"\n--- Iteration {iteration + 1} ---")
|
298 |
iteration_contexts = []
|
299 |
|
300 |
+
# Update to include search_limit
|
301 |
+
search_tasks = [perform_search_async(session, query, search_limit) for query in new_search_queries]
|
302 |
search_results = await asyncio.gather(*search_tasks)
|
303 |
|
304 |
+
|
305 |
unique_links = {}
|
306 |
for idx, links in enumerate(search_results):
|
307 |
query = new_search_queries[idx]
|
|
|
359 |
print(final_report)
|
360 |
|
361 |
if __name__ == "__main__":
|
362 |
+
main()
|