Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -42,7 +42,7 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
42 |
start = 0
|
43 |
all_results = []
|
44 |
max_chars_per_page = 8000 # Limit the number of characters from each webpage to stay under the token limit
|
45 |
-
|
46 |
with requests.Session() as session:
|
47 |
while start < num_results:
|
48 |
print(f"Fetching search results starting from: {start}")
|
@@ -53,7 +53,7 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
53 |
'User-Agent': user_agent
|
54 |
}
|
55 |
print(f"Using User-Agent: {headers['User-Agent']}")
|
56 |
-
|
57 |
resp = session.get(
|
58 |
url="https://www.google.com/search",
|
59 |
headers=headers,
|
@@ -71,7 +71,7 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
71 |
except requests.exceptions.RequestException as e:
|
72 |
print(f"Error fetching search results: {e}")
|
73 |
break
|
74 |
-
|
75 |
soup = BeautifulSoup(resp.text, "html.parser")
|
76 |
result_block = soup.find_all("div", attrs={"class": "g"})
|
77 |
if not result_block:
|
@@ -106,9 +106,9 @@ def format_prompt(query, search_results, instructions):
|
|
106 |
link = result["link"]
|
107 |
text = result["text"]
|
108 |
if link:
|
109 |
-
formatted_results += f"URL: {link}\nContent: {text}\n{'-'
|
110 |
else:
|
111 |
-
formatted_results += "No link found.\n" + '-'
|
112 |
|
113 |
prompt = f"{instructions}User Query: {query}\n\nWeb Search Results:\n{formatted_results}\n\nAssistant:"
|
114 |
return prompt
|
@@ -232,101 +232,55 @@ def save_text_to_pdf(text, output_path):
|
|
232 |
doc.save(output_path) # Save the PDF to the specified path
|
233 |
print("PDF saved successfully.")
|
234 |
|
235 |
-
def get_predefined_queries(company):
|
236 |
-
return [
|
237 |
-
f"Recent earnings for {company}",
|
238 |
-
f"Recent News on {company}",
|
239 |
-
f"Recent Credit rating of {company}",
|
240 |
-
f"Recent conference call transcript of {company}"
|
241 |
-
]
|
242 |
-
|
243 |
-
|
244 |
# Integrated function to perform web scraping, formatting, and text generation
|
245 |
-
def scrape_and_display(query, num_results,
|
246 |
-
credit_rating_instructions, conference_call_instructions, final_instructions,
|
247 |
-
web_search=True, temperature=0.7, repetition_penalty=1.0, top_p=0.9):
|
248 |
print(f"Scraping and displaying results for query: {query} with num_results: {num_results}")
|
249 |
-
|
250 |
if web_search:
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
all_summaries = []
|
255 |
-
|
256 |
-
instructions = [earnings_instructions, news_instructions, credit_rating_instructions, conference_call_instructions]
|
257 |
-
|
258 |
-
for pq, instruction in zip(predefined_queries, instructions):
|
259 |
-
search_results = google_search(pq, num_results=num_results // len(predefined_queries))
|
260 |
-
all_results.extend(search_results)
|
261 |
-
|
262 |
-
# Generate a summary for each predefined query
|
263 |
-
formatted_prompt = format_prompt(pq, search_results, instruction)
|
264 |
-
summary = generate_text(formatted_prompt, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
|
265 |
-
all_summaries.append(summary)
|
266 |
-
|
267 |
-
# Combine all summaries
|
268 |
-
combined_summary = "\n\n".join(all_summaries)
|
269 |
-
|
270 |
-
# Generate final summary using the combined results and final instructions
|
271 |
-
final_prompt = f"{final_instructions}\n\nHere are the summaries for each aspect of {company}:\n\n{combined_summary}\n\nPlease provide a comprehensive summary based on the above information:"
|
272 |
-
generated_summary = generate_text(final_prompt, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
|
273 |
else:
|
274 |
-
formatted_prompt = format_prompt_with_instructions(query,
|
275 |
generated_summary = generate_text(formatted_prompt, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
|
276 |
-
|
277 |
print("Scraping and display complete.")
|
278 |
if generated_summary:
|
|
|
279 |
assistant_index = generated_summary.find("Assistant:")
|
280 |
if assistant_index != -1:
|
281 |
generated_summary = generated_summary[assistant_index:]
|
282 |
else:
|
283 |
generated_summary = "Assistant: No response generated."
|
284 |
-
print(f"Generated summary: {generated_summary}")
|
285 |
return generated_summary
|
286 |
|
287 |
-
|
288 |
# Main Gradio interface function
|
289 |
-
def gradio_interface(query, use_pdf, pdf, num_results,
|
290 |
-
credit_rating_instructions, conference_call_instructions, final_instructions,
|
291 |
-
temperature, repetition_penalty, top_p):
|
292 |
if use_pdf and pdf is not None:
|
293 |
pdf_text = read_pdf(pdf)
|
294 |
-
generated_summary = scrape_and_display(pdf_text, num_results=0, instructions=
|
295 |
-
web_search=False, temperature=temperature,
|
296 |
-
repetition_penalty=repetition_penalty, top_p=top_p)
|
297 |
else:
|
298 |
-
generated_summary = scrape_and_display(query, num_results=num_results,
|
299 |
-
|
300 |
-
|
301 |
-
credit_rating_instructions=credit_rating_instructions,
|
302 |
-
conference_call_instructions=conference_call_instructions,
|
303 |
-
final_instructions=final_instructions,
|
304 |
-
web_search=True, temperature=temperature,
|
305 |
-
repetition_penalty=repetition_penalty, top_p=top_p)
|
306 |
-
|
307 |
output_pdf_path = "output_summary.pdf"
|
308 |
save_text_to_pdf(generated_summary, output_pdf_path)
|
309 |
-
|
310 |
return generated_summary, output_pdf_path
|
311 |
|
312 |
-
#
|
313 |
gr.Interface(
|
314 |
fn=gradio_interface,
|
315 |
inputs=[
|
316 |
-
gr.Textbox(label="
|
317 |
gr.Checkbox(label="Use PDF"),
|
318 |
gr.File(label="Upload PDF"),
|
319 |
-
gr.Slider(minimum=
|
320 |
-
gr.Textbox(label="
|
321 |
-
gr.
|
322 |
-
gr.
|
323 |
-
gr.
|
324 |
-
gr.Textbox(label="Final Summary Instructions", lines=2, placeholder="Instructions for the final summary..."),
|
325 |
-
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature"),
|
326 |
-
gr.Slider(minimum=1.0, maximum=2.0, value=1.0, label="Repetition Penalty"),
|
327 |
-
gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top p")
|
328 |
],
|
329 |
-
outputs=["text", "file"],
|
330 |
title="Financial Analyst AI Assistant",
|
331 |
-
description="Enter a company
|
332 |
-
)
|
|
|
42 |
start = 0
|
43 |
all_results = []
|
44 |
max_chars_per_page = 8000 # Limit the number of characters from each webpage to stay under the token limit
|
45 |
+
|
46 |
with requests.Session() as session:
|
47 |
while start < num_results:
|
48 |
print(f"Fetching search results starting from: {start}")
|
|
|
53 |
'User-Agent': user_agent
|
54 |
}
|
55 |
print(f"Using User-Agent: {headers['User-Agent']}")
|
56 |
+
|
57 |
resp = session.get(
|
58 |
url="https://www.google.com/search",
|
59 |
headers=headers,
|
|
|
71 |
except requests.exceptions.RequestException as e:
|
72 |
print(f"Error fetching search results: {e}")
|
73 |
break
|
74 |
+
|
75 |
soup = BeautifulSoup(resp.text, "html.parser")
|
76 |
result_block = soup.find_all("div", attrs={"class": "g"})
|
77 |
if not result_block:
|
|
|
106 |
link = result["link"]
|
107 |
text = result["text"]
|
108 |
if link:
|
109 |
+
formatted_results += f"URL: {link}\nContent: {text}\n{'-'80}\n"
|
110 |
else:
|
111 |
+
formatted_results += "No link found.\n" + '-'80 + '\n'
|
112 |
|
113 |
prompt = f"{instructions}User Query: {query}\n\nWeb Search Results:\n{formatted_results}\n\nAssistant:"
|
114 |
return prompt
|
|
|
232 |
doc.save(output_path) # Save the PDF to the specified path
|
233 |
print("PDF saved successfully.")
|
234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
# Integrated function to perform web scraping, formatting, and text generation
|
236 |
+
def scrape_and_display(query, num_results, instructions, web_search=True, temperature=0.7, repetition_penalty=1.0, top_p=0.9):
|
|
|
|
|
237 |
print(f"Scraping and displaying results for query: {query} with num_results: {num_results}")
|
|
|
238 |
if web_search:
|
239 |
+
search_results = google_search(query, num_results)
|
240 |
+
formatted_prompt = format_prompt(query, search_results, instructions)
|
241 |
+
generated_summary = generate_text(formatted_prompt, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
else:
|
243 |
+
formatted_prompt = format_prompt_with_instructions(query, instructions)
|
244 |
generated_summary = generate_text(formatted_prompt, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
|
|
|
245 |
print("Scraping and display complete.")
|
246 |
if generated_summary:
|
247 |
+
# Extract and return text starting from "Assistant:"
|
248 |
assistant_index = generated_summary.find("Assistant:")
|
249 |
if assistant_index != -1:
|
250 |
generated_summary = generated_summary[assistant_index:]
|
251 |
else:
|
252 |
generated_summary = "Assistant: No response generated."
|
253 |
+
print(f"Generated summary: {generated_summary}") # Debugging line
|
254 |
return generated_summary
|
255 |
|
|
|
256 |
# Main Gradio interface function
|
257 |
+
def gradio_interface(query, use_pdf, pdf, num_results, instructions, temperature, repetition_penalty, top_p):
|
|
|
|
|
258 |
if use_pdf and pdf is not None:
|
259 |
pdf_text = read_pdf(pdf)
|
260 |
+
generated_summary = scrape_and_display(pdf_text, num_results=0, instructions=instructions, web_search=False, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
|
|
|
|
|
261 |
else:
|
262 |
+
generated_summary = scrape_and_display(query, num_results=num_results, instructions=instructions, web_search=True, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
|
263 |
+
|
264 |
+
# Save the generated summary to a PDF
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
output_pdf_path = "output_summary.pdf"
|
266 |
save_text_to_pdf(generated_summary, output_pdf_path)
|
267 |
+
|
268 |
return generated_summary, output_pdf_path
|
269 |
|
270 |
+
# Deploy Gradio Interface
|
271 |
gr.Interface(
|
272 |
fn=gradio_interface,
|
273 |
inputs=[
|
274 |
+
gr.Textbox(label="Query"),
|
275 |
gr.Checkbox(label="Use PDF"),
|
276 |
gr.File(label="Upload PDF"),
|
277 |
+
gr.Slider(minimum=1, maximum=20, label="Number of Results"), # Added Slider for num_results
|
278 |
+
gr.Textbox(label="Instructions"),
|
279 |
+
gr.Slider(minimum=0.1, maximum=1.0, label="Temperature"),
|
280 |
+
gr.Slider(minimum=0.1, maximum=1.0, label="Repetition Penalty"),
|
281 |
+
gr.Slider(minimum=0.1, maximum=1.0, label="Top p")
|
|
|
|
|
|
|
|
|
282 |
],
|
283 |
+
outputs=["text", "file"], # Updated to return text and a file
|
284 |
title="Financial Analyst AI Assistant",
|
285 |
+
description="Enter your query about a company's financials to get valuable insights. Optionally, upload a PDF for analysis.Please instruct me for curating your output template, also for web search you can modify my search results but its advisable to restrict the same at 10. You can also adjust my parameters like Temperature, Repetition Penalty and Top_P, its adivsable to set repetition penalty at 1 and other two parameters at 0.1.",
|
286 |
+
).launch(share=True)
|