Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import pipeline | |
from duckduckgo_search import DDGS | |
from datetime import datetime | |
import asyncio | |
# Initialize a lightweight text generation model (distilgpt2 for speed) | |
generator = pipeline("text-generation", model="distilgpt2", device=0 if gr.cuda.is_available() else -1) | |
# Web search function using DuckDuckGo | |
async def get_web_results(query: str, max_results: int = 5) -> list: | |
"""Fetch web results asynchronously for deep research.""" | |
try: | |
with DDGS() as ddgs: | |
results = await asyncio.to_thread(lambda: list(ddgs.text(query, max_results=max_results))) | |
return [ | |
{"title": r.get("title", "No Title"), "snippet": r["body"], "url": r["href"]} | |
for r in results | |
] | |
except Exception as e: | |
return [{"title": "Error", "snippet": f"Failed to fetch results: {str(e)}", "url": "#"}] | |
# Format prompt for the AI model | |
def format_prompt(query: str, web_results: list) -> str: | |
"""Create a concise prompt with web context.""" | |
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
context = "\n".join([f"- {r['title']}: {r['snippet']}" for r in web_results]) | |
return f"""Time: {current_time} | |
Query: {query} | |
Web Context: | |
{context} | |
Provide a detailed, well-structured answer in markdown format with citations [1], [2], etc.""" | |
# Generate answer using the AI model | |
def generate_answer(prompt: str) -> str: | |
"""Generate a detailed research answer.""" | |
response = generator(prompt, max_length=300, num_return_sequences=1, truncation=True)[0]["generated_text"] | |
# Extract the answer after the prompt | |
answer_start = response.find("Provide a detailed") + len("Provide a detailed, well-structured answer in markdown format with citations [1], [2], etc.") | |
return response[answer_start:].strip() | |
# Format sources for display | |
def format_sources(web_results: list) -> str: | |
"""Create an HTML list of sources.""" | |
if not web_results: | |
return "<div>No sources available</div>" | |
sources_html = "<div class='sources-list'>" | |
for i, res in enumerate(web_results, 1): | |
sources_html += f""" | |
<div class='source-item'> | |
<span class='source-number'>[{i}]</span> | |
<a href='{res['url']}' target='_blank'>{res['title']}</a>: {res['snippet'][:150]}... | |
</div> | |
""" | |
sources_html += "</div>" | |
return sources_html | |
# Main processing function | |
async def process_deep_research(query: str, history: list): | |
"""Handle the deep research process with progressive updates.""" | |
if not history: | |
history = [] | |
# Step 1: Initial loading state | |
yield { | |
"answer": "*Searching the web...*", | |
"sources": "<div>Fetching sources...</div>", | |
"history": history + [[query, "*Searching...*"]] | |
} | |
# Step 2: Fetch web results | |
web_results = await get_web_results(query) | |
sources_html = format_sources(web_results) | |
# Step 3: Update with web search completed | |
yield { | |
"answer": "*Analyzing results...*", | |
"sources": sources_html, | |
"history": history + [[query, "*Analyzing...*"]] | |
} | |
# Step 4: Generate detailed answer | |
prompt = format_prompt(query, web_results) | |
answer = generate_answer(prompt) | |
final_history = history + [[query, answer]] | |
# Step 5: Final result | |
yield { | |
"answer": answer, | |
"sources": sources_html, | |
"history": final_history | |
} | |
# Custom CSS for a cool, modern UI | |
css = """ | |
body { | |
font-family: 'Arial', sans-serif; | |
background: #1a1a1a; | |
color: #ffffff; | |
} | |
.gradio-container { | |
max-width: 1000px; | |
margin: 0 auto; | |
padding: 20px; | |
} | |
.header { | |
text-align: center; | |
padding: 20px; | |
background: linear-gradient(135deg, #2c3e50, #3498db); | |
border-radius: 10px; | |
margin-bottom: 20px; | |
} | |
.header h1 { | |
font-size: 2.5em; | |
margin: 0; | |
color: #ffffff; | |
} | |
.header p { | |
color: #bdc3c7; | |
font-size: 1.1em; | |
} | |
.search-box { | |
background: #2c2c2c; | |
padding: 15px; | |
border-radius: 10px; | |
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3); | |
} | |
.search-box input { | |
background: #3a3a3a !important; | |
color: #ffffff !important; | |
border: none !important; | |
border-radius: 5px !important; | |
} | |
.search-box button { | |
background: #3498db !important; | |
border: none !important; | |
border-radius: 5px !important; | |
transition: background 0.3s; | |
} | |
.search-box button:hover { | |
background: #2980b9 !important; | |
} | |
.results-container { | |
margin-top: 20px; | |
display: flex; | |
gap: 20px; | |
} | |
.answer-box { | |
flex: 2; | |
background: #2c2c2c; | |
padding: 20px; | |
border-radius: 10px; | |
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3); | |
} | |
.answer-box .markdown { | |
color: #ecf0f1; | |
line-height: 1.6; | |
} | |
.sources-list { | |
flex: 1; | |
background: #2c2c2c; | |
padding: 15px; | |
border-radius: 10px; | |
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3); | |
} | |
.source-item { | |
margin-bottom: 10px; | |
} | |
.source-number { | |
color: #3498db; | |
font-weight: bold; | |
margin-right: 5px; | |
} | |
.source-item a { | |
color: #3498db; | |
text-decoration: none; | |
} | |
.source-item a:hover { | |
text-decoration: underline; | |
} | |
.history-box { | |
margin-top: 20px; | |
background: #2c2c2c; | |
padding: 15px; | |
border-radius: 10px; | |
max-height: 300px; | |
overflow-y: auto; | |
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3); | |
} | |
""" | |
# Gradio app setup with Blocks for better control | |
with gr.Blocks(title="Deep Research Engine", css=css) as demo: | |
history_state = gr.State([]) | |
# Header | |
with gr.Column(elem_classes="header"): | |
gr.Markdown("# Deep Research Engine") | |
gr.Markdown("Your gateway to in-depth answers with real-time web insights.") | |
# Search input and button | |
with gr.Row(elem_classes="search-box"): | |
search_input = gr.Textbox(label="", placeholder="Ask anything...", lines=2) | |
search_btn = gr.Button("Research", variant="primary") | |
# Results layout | |
with gr.Row(elem_classes="results-container"): | |
with gr.Column(): | |
answer_output = gr.Markdown(label="Research Findings", elem_classes="answer-box") | |
with gr.Column(): | |
sources_output = gr.HTML(label="Sources", elem_classes="sources-list") | |
# Chat history | |
with gr.Row(): | |
history_display = gr.Chatbot(label="History", elem_classes="history-box") | |
# Event handling | |
async def handle_search(query, history): | |
async for step in process_deep_research(query, history): | |
yield step["answer"], step["sources"], step["history"] | |
search_btn.click( | |
fn=handle_search, | |
inputs=[search_input, history_state], | |
outputs=[answer_output, sources_output, history_display], | |
_js="() => [document.querySelector('.search-box input').value, null]" # Ensure history is managed | |
).then( | |
fn=lambda x: x, | |
inputs=[history_display], | |
outputs=[history_state] | |
) | |
search_input.submit( | |
fn=handle_search, | |
inputs=[search_input, history_state], | |
outputs=[answer_output, sources_output, history_display] | |
).then( | |
fn=lambda x: x, | |
inputs=[history_display], | |
outputs=[history_state] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |