sagar007 commited on
Commit
12a0d68
·
verified ·
1 Parent(s): 6c1f2d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -54
app.py CHANGED
@@ -1,53 +1,77 @@
1
  import gradio as gr
2
  import spaces # Required for ZeroGPU
3
- from transformers import pipeline
4
  from duckduckgo_search import DDGS
5
  from datetime import datetime
6
 
7
  # Initialize a lightweight text generation model on CPU
8
  generator = pipeline("text-generation", model="distilgpt2", device=-1) # -1 ensures CPU by default
 
9
 
10
  # Web search function (CPU-based)
11
- def get_web_results(query: str, max_results: int = 3) -> list:
12
- """Fetch web results synchronously for Zero GPU compatibility."""
13
  try:
14
  with DDGS() as ddgs:
15
  results = list(ddgs.text(query, max_results=max_results))
16
- return [{"title": r.get("title", "No Title"), "snippet": r["body"], "url": r["href"]} for r in results]
 
 
 
 
 
 
 
17
  except Exception as e:
18
  return [{"title": "Error", "snippet": f"Failed to fetch results: {str(e)}", "url": "#"}]
19
 
20
- # Format prompt for the AI model (CPU-based)
21
  def format_prompt(query: str, web_results: list) -> str:
22
- """Create a concise prompt with web context."""
23
- current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
24
  context = "\n".join([f"- {r['title']}: {r['snippet']}" for r in web_results])
25
- return f"""Time: {current_time}
26
  Query: {query}
27
- Web Context:
28
  {context}
29
- Provide a concise answer in markdown format with citations [1], [2], etc."""
30
 
31
- # GPU-decorated answer generation
32
- @spaces.GPU(duration=120) # Allow up to 120 seconds of GPU time
33
  def generate_answer(prompt: str) -> str:
34
- """Generate a concise research answer using GPU."""
35
- # Use max_new_tokens instead of max_length to allow new token generation
36
- response = generator(prompt, max_new_tokens=150, num_return_sequences=1, truncation=True)[0]["generated_text"]
37
- answer_start = response.find("Provide a concise") + len("Provide a concise answer in markdown format with citations [1], [2], etc.")
38
- return response[answer_start:].strip() if answer_start > -1 else "No detailed answer generated."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- # Format sources for display (CPU-based)
41
  def format_sources(web_results: list) -> str:
42
- """Create a simple HTML list of sources."""
43
  if not web_results:
44
- return "<div>No sources available</div>"
 
45
  sources_html = "<div class='sources-list'>"
46
  for i, res in enumerate(web_results, 1):
47
  sources_html += f"""
48
  <div class='source-item'>
49
  <span class='source-number'>[{i}]</span>
50
- <a href='{res['url']}' target='_blank'>{res['title']}</a>: {res['snippet'][:100]}...
 
51
  </div>
52
  """
53
  sources_html += "</div>"
@@ -55,7 +79,7 @@ def format_sources(web_results: list) -> str:
55
 
56
  # Main processing function
57
  def process_deep_research(query: str, history: list):
58
- """Handle the deep research process."""
59
  if not history:
60
  history = []
61
 
@@ -72,7 +96,7 @@ def process_deep_research(query: str, history: list):
72
 
73
  return answer, sources_html, new_history
74
 
75
- # Custom CSS for a cool, lightweight UI
76
  css = """
77
  body {
78
  font-family: 'Arial', sans-serif;
@@ -80,68 +104,112 @@ body {
80
  color: #ffffff;
81
  }
82
  .gradio-container {
83
- max-width: 900px;
84
  margin: 0 auto;
85
- padding: 15px;
86
  }
87
  .header {
88
  text-align: center;
89
- padding: 15px;
90
  background: linear-gradient(135deg, #2c3e50, #3498db);
91
- border-radius: 8px;
92
- margin-bottom: 15px;
 
93
  }
94
- .header h1 { font-size: 2em; margin: 0; color: #ffffff; }
95
- .header p { color: #bdc3c7; font-size: 1em; }
96
  .search-box {
97
  background: #2c2c2c;
98
- padding: 10px;
99
- border-radius: 8px;
100
- box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
 
101
  }
102
  .search-box input {
103
- background: #3a3a3a !important;
104
  color: #ffffff !important;
105
  border: none !important;
106
- border-radius: 5px !important;
 
 
107
  }
108
  .search-box button {
109
  background: #3498db !important;
110
  border: none !important;
111
- border-radius: 5px !important;
 
 
 
112
  }
 
113
  .results-container {
114
- margin-top: 15px;
115
  display: flex;
116
- gap: 15px;
117
  }
118
  .answer-box {
119
  flex: 2;
120
  background: #2c2c2c;
121
- padding: 15px;
122
- border-radius: 8px;
123
- box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
 
 
124
  }
125
- .answer-box .markdown { color: #ecf0f1; line-height: 1.5; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  .sources-list {
127
  flex: 1;
128
  background: #2c2c2c;
 
 
 
 
 
 
 
 
 
129
  padding: 10px;
 
130
  border-radius: 8px;
131
- box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
132
  }
133
- .source-item { margin-bottom: 8px; }
134
- .source-number { color: #3498db; font-weight: bold; margin-right: 5px; }
135
- .source-item a { color: #3498db; text-decoration: none; }
136
- .source-item a:hover { text-decoration: underline; }
137
  .history-box {
138
- margin-top: 15px;
139
  background: #2c2c2c;
140
- padding: 10px;
141
- border-radius: 8px;
142
- max-height: 250px;
143
  overflow-y: auto;
144
- box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
145
  }
146
  """
147
 
@@ -152,11 +220,11 @@ with gr.Blocks(title="Deep Research Engine - ZeroGPU", css=css) as demo:
152
  # Header
153
  with gr.Column(elem_classes="header"):
154
  gr.Markdown("# Deep Research Engine")
155
- gr.Markdown("Fast, in-depth answers powered by web insights (ZeroGPU).")
156
 
157
  # Search input and button
158
  with gr.Row(elem_classes="search-box"):
159
- search_input = gr.Textbox(label="", placeholder="Ask anything...", lines=2)
160
  search_btn = gr.Button("Research", variant="primary")
161
 
162
  # Results layout
 
1
  import gradio as gr
2
  import spaces # Required for ZeroGPU
3
+ from transformers import pipeline, AutoTokenizer
4
  from duckduckgo_search import DDGS
5
  from datetime import datetime
6
 
7
  # Initialize a lightweight text generation model on CPU
8
  generator = pipeline("text-generation", model="distilgpt2", device=-1) # -1 ensures CPU by default
9
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") # For better token handling
10
 
11
  # Web search function (CPU-based)
12
+ def get_web_results(query: str, max_results: int = 5) -> list:
13
+ """Fetch web results synchronously for Zero GPU compatibility, focusing on high-quality sources."""
14
  try:
15
  with DDGS() as ddgs:
16
  results = list(ddgs.text(query, max_results=max_results))
17
+ # Filter for high-quality, relevant results (e.g., educational, authoritative sites)
18
+ filtered_results = [
19
+ {"title": r.get("title", "No Title"), "snippet": r["body"], "url": r["href"]}
20
+ for r in results
21
+ if any(domain in r["href"] for domain in ["geeksforgeeks.org", "realpython.com", "coursera.org", "udemy.com", "stackexchange.com"])
22
+ or "edu" in r["href"]
23
+ ]
24
+ return filtered_results if filtered_results else results # Fall back to all results if no high-quality ones found
25
  except Exception as e:
26
  return [{"title": "Error", "snippet": f"Failed to fetch results: {str(e)}", "url": "#"}]
27
 
28
+ # Format prompt for the AI model to generate high-quality, structured answers
29
  def format_prompt(query: str, web_results: list) -> str:
30
+ """Create a detailed prompt with web context to guide the model toward high-quality answers."""
31
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S IST")
32
  context = "\n".join([f"- {r['title']}: {r['snippet']}" for r in web_results])
33
+ return f"""Current Time: {current_time}
34
  Query: {query}
35
+ Web Context (High-Quality Sources):
36
  {context}
37
+ Provide a detailed, step-by-step answer in markdown format with clear headings (e.g., #, ##), bullet points, and citations [1], [2], etc. Ensure the answer is structured, relevant, and visually appealing, addressing the user's intent comprehensively. If the query is informational (e.g., 'what,' 'how,' 'why'), offer in-depth insights, examples, and practical advice. If no high-quality answer is possible, state, 'I couldn’t find sufficient high-quality information to provide a detailed answer, but here’s what I found:' followed by a summary of web results."""
38
 
39
+ # GPU-decorated answer generation for high-quality output
40
+ @spaces.GPU(duration=180) # Increased duration for more detailed generation
41
  def generate_answer(prompt: str) -> str:
42
+ """Generate a detailed, high-quality research answer using GPU."""
43
+ # Tokenize and truncate prompt to fit within limits
44
+ tokenized_prompt = tokenizer(prompt, truncation=True, max_length=200, return_tensors="pt")
45
+ input_ids = tokenized_prompt["input_ids"]
46
+
47
+ # Generate response with more tokens and better sampling for quality
48
+ response = generator(
49
+ prompt,
50
+ max_new_tokens=400, # Increased for more detailed output
51
+ num_return_sequences=1,
52
+ truncation=True,
53
+ do_sample=True,
54
+ temperature=0.7, # Controlled randomness for coherent, detailed output
55
+ top_p=0.9, # Focus on top probabilities for quality
56
+ top_k=50 # Limit to top 50 tokens for better coherence
57
+ )[0]["generated_text"]
58
+
59
+ answer_start = response.find("Provide a detailed") + len("Provide a detailed, step-by-step answer in markdown format with clear headings (e.g., #, ##), bullet points, and citations [1], [2], etc. Ensure the answer is structured, relevant, and visually appealing, addressing the user's intent comprehensively. If the query is informational (e.g., 'what,' 'how,' 'why'), offer in-depth insights, examples, and practical advice. If no high-quality answer is possible, state, 'I couldn’t find sufficient high-quality information to provide a detailed answer, but here’s what I found:' followed by a summary of web results.")
60
+ return response[answer_start:].strip() if answer_start > -1 else "I couldn’t find sufficient high-quality information to provide a detailed answer, but here’s what I found:\n\n" + "\n".join([f"- {r['title']}: {r['snippet']}" for r in get_web_results(query, max_results=3)])
61
 
62
+ # Format sources for display with enhanced styling
63
  def format_sources(web_results: list) -> str:
64
+ """Create a styled HTML list of sources with better visuals for high-quality presentation."""
65
  if not web_results:
66
+ return "<div class='no-sources'>No sources available</div>"
67
+
68
  sources_html = "<div class='sources-list'>"
69
  for i, res in enumerate(web_results, 1):
70
  sources_html += f"""
71
  <div class='source-item'>
72
  <span class='source-number'>[{i}]</span>
73
+ <a href='{res['url']}' target='_blank' class='source-link'>{res['title']}</a>
74
+ <p class='source-snippet'>{res['snippet'][:150]}...</p>
75
  </div>
76
  """
77
  sources_html += "</div>"
 
79
 
80
  # Main processing function
81
  def process_deep_research(query: str, history: list):
82
+ """Handle the deep research process for any query with high-quality output."""
83
  if not history:
84
  history = []
85
 
 
96
 
97
  return answer, sources_html, new_history
98
 
99
+ # Enhanced CSS for a polished, visually appealing UI
100
  css = """
101
  body {
102
  font-family: 'Arial', sans-serif;
 
104
  color: #ffffff;
105
  }
106
  .gradio-container {
107
+ max-width: 1200px;
108
  margin: 0 auto;
109
+ padding: 20px;
110
  }
111
  .header {
112
  text-align: center;
113
+ padding: 20px;
114
  background: linear-gradient(135deg, #2c3e50, #3498db);
115
+ border-radius: 12px;
116
+ margin-bottom: 20px;
117
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
118
  }
119
+ .header h1 { font-size: 2.5em; margin: 0; color: #ffffff; text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5); }
120
+ .header p { color: #bdc3c7; font-size: 1.1em; }
121
  .search-box {
122
  background: #2c2c2c;
123
+ padding: 15px;
124
+ border-radius: 12px;
125
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
126
+ margin-bottom: 20px;
127
  }
128
  .search-box input {
129
+ background: #3a3a3e !important;
130
  color: #ffffff !important;
131
  border: none !important;
132
+ border-radius: 8px !important;
133
+ padding: 10px;
134
+ font-size: 1em;
135
  }
136
  .search-box button {
137
  background: #3498db !important;
138
  border: none !important;
139
+ border-radius: 8px !important;
140
+ padding: 10px 20px;
141
+ font-size: 1em;
142
+ transition: background 0.3s;
143
  }
144
+ .search-box button:hover { background: #2980b9 !important; }
145
  .results-container {
146
+ margin-top: 20px;
147
  display: flex;
148
+ gap: 20px;
149
  }
150
  .answer-box {
151
  flex: 2;
152
  background: #2c2c2c;
153
+ padding: 20px;
154
+ border-radius: 12px;
155
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
156
+ overflow-y: auto;
157
+ max-height: 600px;
158
  }
159
+ .answer-box .markdown {
160
+ color: #ecf0f1;
161
+ line-height: 1.6;
162
+ }
163
+ .answer-box .markdown h1 {
164
+ color: #ffffff;
165
+ border-bottom: 2px solid #3498db;
166
+ padding-bottom: 10px;
167
+ }
168
+ .answer-box .markdown h2 {
169
+ color: #a8b5c3;
170
+ margin-top: 20px;
171
+ }
172
+ .answer-box .markdown ul {
173
+ list-style-type: none;
174
+ padding-left: 20px;
175
+ }
176
+ .answer-box .markdown ul li::before {
177
+ content: "•";
178
+ color: #3498db;
179
+ display: inline-block;
180
+ width: 1em;
181
+ margin-left: -1em;
182
+ }
183
+ .answer-box .markdown a { color: #60a5fa; text-decoration: underline; }
184
  .sources-list {
185
  flex: 1;
186
  background: #2c2c2c;
187
+ padding: 15px;
188
+ border-radius: 12px;
189
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
190
+ max-height: 600px;
191
+ overflow-y: auto;
192
+ }
193
+ .no-sources { color: #a8a9ab; font-style: italic; }
194
+ .source-item {
195
+ margin-bottom: 15px;
196
  padding: 10px;
197
+ background: #3a3a3e;
198
  border-radius: 8px;
199
+ transition: background 0.2s;
200
  }
201
+ .source-item:hover { background: #4a4b4e; }
202
+ .source-number { color: #3498db; font-weight: bold; margin-right: 10px; }
203
+ .source-link { color: #60a5fa; font-weight: 500; display: block; margin-bottom: 5px; }
204
+ .source-snippet { color: #e5e7eb; font-size: 0.9em; line-height: 1.4; }
205
  .history-box {
206
+ margin-top: 20px;
207
  background: #2c2c2c;
208
+ padding: 15px;
209
+ border-radius: 12px;
210
+ max-height: 300px;
211
  overflow-y: auto;
212
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
213
  }
214
  """
215
 
 
220
  # Header
221
  with gr.Column(elem_classes="header"):
222
  gr.Markdown("# Deep Research Engine")
223
+ gr.Markdown("Your gateway to in-depth, high-quality research for any query with real-time web insights.")
224
 
225
  # Search input and button
226
  with gr.Row(elem_classes="search-box"):
227
+ search_input = gr.Textbox(label="", placeholder="Ask anything (e.g., 'What are the latest AI trends in 2025?')", lines=2)
228
  search_btn = gr.Button("Research", variant="primary")
229
 
230
  # Results layout