Omar ID EL MOUMEN commited on
Commit
ad66b01
·
1 Parent(s): 62eaf4f

Reworked app

Browse files
Files changed (4) hide show
  1. app.py +17 -21
  2. static/script.js +79 -55
  3. static/style.css +29 -0
  4. templates/index.html +5 -1
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from fastapi.responses import FileResponse
4
  from fastapi.staticfiles import StaticFiles
@@ -7,7 +7,6 @@ import xml.etree.ElementTree as xmlparser
7
  import requests
8
  from pydantic import BaseModel
9
  import sys
10
- import random
11
  import fitz
12
  import re,os,json
13
  from io import BytesIO
@@ -116,18 +115,13 @@ async def extract_arxiv_pdf(document: DocumentID):
116
  for title in titles:
117
  if title[0] == 1 or title[0] == 2:
118
  main_titles.append((title[0], title[1]))
119
- return {"pub_id": document.doc_id, "titles": [(t[0],re.sub(r"\s+", " ", remove_punctuations(remove_in_betweens(t[1]))).strip()) for t in main_titles], "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": document.doc_id, "titles": "No titles found !", "text": postprocess_text, "error": False}
120
  else:
121
  print("ID: " + document.doc_id)
122
  print("URL: " + f"http://arxiv.org/pdf/{document.doc_id}")
123
  print("Status code: " + str(pdf_req.status_code))
124
  return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
125
 
126
- @app.post("/extract_pdf/arxiv_id/random")
127
- async def extract_random_arxiv_pdf(query: Query):
128
- pubs = await get_articles(query)
129
- return await extract_arxiv_pdf(random.choice(list(pubs["message"].keys())))
130
-
131
  @app.post("/extract_pdf/url")
132
  async def extract_pdf(pdf: WebPDF):
133
  pdf_req = requests.get(pdf.url)
@@ -152,12 +146,12 @@ def researcher(model, user, token):
152
  url = 'https://api.groq.com/openai/v1/chat/completions'
153
  headers = {
154
  'Content-Type': 'application/json',
155
- 'Authorization': f'Bearer {token}'
156
  }
157
  system_msg = {
158
  'role': 'system',
159
  'content': (
160
- 'You are an experience PhD professor with 20 years experience in research. You help the user build their research plan based on the following examples. build the plan according to the examples without further questions. provide the steps of the plan in a form of research requests to arxiv, nothing else:\n'
161
  '''<example>
162
  <search-request>
163
  Help me research recent AI-powered marketing campaigns to benchmark for 2025 planning
@@ -288,8 +282,8 @@ research how new business models beyond ROI could benefit societal impact of 6G
288
  (6) Find information on potential risks and challenges associated with new business models for 6G technology.
289
  </search-plan>
290
  </example>\n'''
291
- 'Optionally, do not precise the sources, as we search on every websites that we possibly can.'
292
- 'For the response format, send a JSON of this format : [{"step_index": The step number, "step_text": What we have to do, "keywords": The important keywords separated by spaces (no comma) (important: the keywords that we will use for search engines and APIs, so get rid of `research papers`, `articles`, ... keywords), `privilegie les abbreviations`}, ...] Take those for examples :\n'
293
  '''<example><search-request>Provide a plan for 6G challenges</search-request><search-plan>[
294
  {
295
  "step_index": 1,
@@ -372,21 +366,23 @@ research how new business models beyond ROI could benefit societal impact of 6G
372
  class GroqRequest(BaseModel):
373
  model: str
374
  user: str
375
- token: str
376
-
377
- class Plan(BaseModel):
378
- plan: dict
379
 
380
- @app.post("/get_plan")
381
- def get_research_plan(infos: GroqRequest):
382
- plan = researcher(infos.model, infos.user, infos.token)
 
 
 
 
383
  plan = json.loads(re.sub(r"\s+", " ", plan))
384
  return {"error": False, "plan": plan}
385
 
386
  @app.post("/search/plan/arxiv")
387
- async def get_articles_from_plan(infos: GroqRequest):
388
  plan_articles = []
389
- plan = get_research_plan(infos)["plan"]
 
 
390
  for step in plan:
391
  index, inst, kws = step.values()
392
  data = await get_articles(Query(keyword=kws, limit=5))
 
1
+ from fastapi import FastAPI, Header
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from fastapi.responses import FileResponse
4
  from fastapi.staticfiles import StaticFiles
 
7
  import requests
8
  from pydantic import BaseModel
9
  import sys
 
10
  import fitz
11
  import re,os,json
12
  from io import BytesIO
 
115
  for title in titles:
116
  if title[0] == 1 or title[0] == 2:
117
  main_titles.append((title[0], title[1]))
118
+ return {"pub_id": document.doc_id, "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": document.doc_id, "titles": "No titles found !", "text": postprocess_text, "error": False}
119
  else:
120
  print("ID: " + document.doc_id)
121
  print("URL: " + f"http://arxiv.org/pdf/{document.doc_id}")
122
  print("Status code: " + str(pdf_req.status_code))
123
  return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
124
 
 
 
 
 
 
125
  @app.post("/extract_pdf/url")
126
  async def extract_pdf(pdf: WebPDF):
127
  pdf_req = requests.get(pdf.url)
 
146
  url = 'https://api.groq.com/openai/v1/chat/completions'
147
  headers = {
148
  'Content-Type': 'application/json',
149
+ 'Authorization': f'Bearer {token}',
150
  }
151
  system_msg = {
152
  'role': 'system',
153
  'content': (
154
+ 'You are an experience PhD professor with 20 years experience in research. You help the user build their research plan based on the following examples. build the plan according to the examples without further questions. provide the steps of the plan in a form of research requests to search engines of public document publisher or web searching purposes, nothing else:\n'
155
  '''<example>
156
  <search-request>
157
  Help me research recent AI-powered marketing campaigns to benchmark for 2025 planning
 
282
  (6) Find information on potential risks and challenges associated with new business models for 6G technology.
283
  </search-plan>
284
  </example>\n'''
285
+ 'Optionally, do not precise the sources, as we search on every websites that we possibly can. Take note that sometimes, the user will send you keywords only, just provide report of them.\n'
286
+ 'For the response format, you must send a JSON of this format : [{"step_index": The step number, "step_text": What we have to do, "keywords": The important keywords separated by spaces (no comma) (important: the keywords that we will use for search engines and APIs, so get rid of `research papers`, `articles`, ... keywords), `privilegie les abbreviations`}, ...] Take those for examples :\n'
287
  '''<example><search-request>Provide a plan for 6G challenges</search-request><search-plan>[
288
  {
289
  "step_index": 1,
 
366
  class GroqRequest(BaseModel):
367
  model: str
368
  user: str
 
 
 
 
369
 
370
+ @app.post("/search/plan")
371
+ async def get_research_plan(infos: GroqRequest, api_key: str = Header(None, alias="GROQ_TOKEN")):
372
+ if api_key is None:
373
+ return {"error": True, "message": "Missing API key", "plan": ""}
374
+ plan = researcher(infos.model, infos.user, api_key)
375
+ if plan == "":
376
+ return {"error": True, "plan": "", "message": "Error while generating the research plan"}
377
  plan = json.loads(re.sub(r"\s+", " ", plan))
378
  return {"error": False, "plan": plan}
379
 
380
  @app.post("/search/plan/arxiv")
381
+ async def get_arxiv_research_plan(infos: GroqRequest, api_key: str = Header(None, alias="GROQ_TOKEN")):
382
  plan_articles = []
383
+ plan = get_research_plan(infos, api_key)["plan"]
384
+ if plan == "":
385
+ return {"error": True, "message": "Error while generating the research plan"}
386
  for step in plan:
387
  index, inst, kws = step.values()
388
  data = await get_articles(Query(keyword=kws, limit=5))
static/script.js CHANGED
@@ -1,14 +1,62 @@
1
  document.addEventListener('DOMContentLoaded', function() {
2
  const searchInput = document.getElementById('keyword-input');
 
3
  const limitInput = document.getElementById("limit-input");
4
  const searchButton = document.getElementById('search-button');
 
5
  const limitCheckbox = document.getElementById("limit-check");
6
  const popupText = document.querySelector(".scrollable-text");
7
  const popupTitle = document.getElementById("popup-title");
8
  const body = document.body;
9
  const resultsContainer = document.getElementById('results-container');
10
 
11
- // Function to perform the search
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  async function performSearch() {
13
  const keyword = searchInput.value.trim();
14
  let limit = limitInput.value.trim();
@@ -25,7 +73,7 @@ document.addEventListener('DOMContentLoaded', function() {
25
  resultsContainer.innerHTML = '<div class="loading">Searching for ' + limit.toString() + ' papers about "' + keyword + '"...</div>';
26
 
27
  try {
28
- const response = await fetch('https://om4r932-arxiv.hf.space/search', {
29
  method: 'POST',
30
  headers: {
31
  'Accept': 'application/json',
@@ -64,7 +112,32 @@ document.addEventListener('DOMContentLoaded', function() {
64
  }
65
  }
66
 
67
- // Function to display the results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  function displayResults(results) {
69
  resultsContainer.innerHTML = '';
70
 
@@ -93,7 +166,6 @@ document.addEventListener('DOMContentLoaded', function() {
93
  <div class="paper-id">ArXiv ID: ${id}</div>
94
  <a type="button" class="btn btn-primary" role="button" href="${paper.pdf}">Show PDF</a>
95
  <input type="button" value="Extract Text" doc="${id}" class="extractText btn btn-success"/>
96
- <input type="button" value="Get titles" doc="${id}" class="getTitle btn btn-danger"/>
97
  `;
98
  resultsContainer.appendChild(paperElement);
99
  });
@@ -103,7 +175,7 @@ document.addEventListener('DOMContentLoaded', function() {
103
  let id_doc = btn.getAttribute("doc");
104
  popupTitle.textContent = `PDF extraction - Document no ${id_doc}`;
105
  try {
106
- const response = await fetch('https://om4r932-arxiv.hf.space/extract_pdf/arxiv_id', {
107
  method: 'POST',
108
  headers: {
109
  'Accept': 'application/json',
@@ -124,6 +196,7 @@ document.addEventListener('DOMContentLoaded', function() {
124
  popupText.innerHTML = `<div class="error-message">Search error: ${data.message}</div>`;
125
  return;
126
  }
 
127
 
128
  popupText.textContent = data.text;
129
  } catch (error) {
@@ -134,56 +207,6 @@ document.addEventListener('DOMContentLoaded', function() {
134
  body.classList.add("no-scroll")
135
  });
136
  })
137
-
138
- document.querySelectorAll(".getTitle").forEach(btn => {
139
- btn.addEventListener("click", async function () {
140
- let id_doc = btn.getAttribute("doc");
141
- popupTitle.textContent = `Chapters - Document no ${id_doc}`;
142
- try {
143
- const response = await fetch('https://om4r932-arxiv.hf.space/extract_pdf/arxiv_id', {
144
- method: 'POST',
145
- headers: {
146
- 'Accept': 'application/json',
147
- 'Content-Type': 'application/json'
148
- },
149
- body: JSON.stringify({
150
- doc_id: id_doc
151
- })
152
- });
153
-
154
- if (!response.ok) {
155
- throw new Error(`HTTP error! status: ${response.status}`);
156
- }
157
-
158
- const data = await response.json();
159
-
160
- if (data.error) {
161
- popupText.innerHTML = `<div class="error-message">Search error: ${data.message}</div>`;
162
- return;
163
- }
164
- let t = "";
165
- if(Array.isArray(data.titles)){
166
- for(const [lvl, title] of data.titles){
167
- if(lvl == 1){
168
- t += title + "<br>"
169
- } else if (lvl == 2) {
170
- t += "<pre>" + title + "<br></pre>"
171
- } else {
172
- t += title + "<br>"
173
- }
174
- }
175
- popupText.innerHTML = t;
176
- } else {
177
- popupText.textContent = data.titles;
178
- }
179
- } catch (error) {
180
- console.error('Search error:', error);
181
- popupText.innerHTML = `<div class="error-message">Error performing search: ${error.message}</div>`;
182
- }
183
- document.getElementById("popup").style.display = "flex";
184
- body.classList.add("no-scroll")
185
- });
186
- })
187
  }
188
 
189
  // Add event listeners
@@ -200,6 +223,7 @@ document.addEventListener('DOMContentLoaded', function() {
200
  });
201
 
202
  searchButton.addEventListener('click', performSearch);
 
203
  limitCheckbox.addEventListener("change", handleCustomLimit);
204
  searchInput.addEventListener('keypress', (e) => {
205
  if (e.key === 'Enter') performSearch();
 
1
  document.addEventListener('DOMContentLoaded', function() {
2
  const searchInput = document.getElementById('keyword-input');
3
+ const apiKey = document.getElementById('apikey-input');
4
  const limitInput = document.getElementById("limit-input");
5
  const searchButton = document.getElementById('search-button');
6
+ const researchButton = document.getElementById('plan-button');
7
  const limitCheckbox = document.getElementById("limit-check");
8
  const popupText = document.querySelector(".scrollable-text");
9
  const popupTitle = document.getElementById("popup-title");
10
  const body = document.body;
11
  const resultsContainer = document.getElementById('results-container');
12
 
13
+ async function performResearch() {
14
+ const keyword = searchInput.value.trim();
15
+ let limit = limitInput.value.trim();
16
+ if (!keyword) {
17
+ resultsContainer.innerHTML = '<div class="error-message">Please enter a search keyword.</div>';
18
+ return;
19
+ }
20
+
21
+ if (!limit){
22
+ limit = 15;
23
+ }
24
+
25
+ // Show loading indicator
26
+ resultsContainer.innerHTML = '<div class="loading">Generating research plan about "' + keyword + '"...</div>';
27
+
28
+ try {
29
+ const response = await fetch('/search/plan', {
30
+ method: 'POST',
31
+ headers: {
32
+ 'Accept': 'application/json',
33
+ 'Content-Type': 'application/json',
34
+ "GROQ_TOKEN": apiKey.value
35
+ },
36
+ body: JSON.stringify({
37
+ model: "llama-3.3-70b-versatile",
38
+ user: keyword,
39
+ })
40
+ });
41
+
42
+ if (!response.ok) {
43
+ throw new Error(`HTTP error! status: ${response.status}`);
44
+ }
45
+
46
+ const data = await response.json();
47
+
48
+ if (data.error) {
49
+ resultsContainer.innerHTML = `<div class="error-message">Error: ${data.message}</div>`;
50
+ return;
51
+ }
52
+
53
+ displayPlan(data.plan);
54
+ } catch (error) {
55
+ console.error('Search error:', error);
56
+ resultsContainer.innerHTML = `<div class="error-message">Error performing generation: ${error.message}</div>`;
57
+ }
58
+ }
59
+
60
  async function performSearch() {
61
  const keyword = searchInput.value.trim();
62
  let limit = limitInput.value.trim();
 
73
  resultsContainer.innerHTML = '<div class="loading">Searching for ' + limit.toString() + ' papers about "' + keyword + '"...</div>';
74
 
75
  try {
76
+ const response = await fetch('/search', {
77
  method: 'POST',
78
  headers: {
79
  'Accept': 'application/json',
 
112
  }
113
  }
114
 
115
+ function displayPlan(results) {
116
+ console.log(results);
117
+ resultsContainer.innerHTML = '';
118
+ if (!results || Object.keys(results).length === 0) {
119
+ resultsContainer.innerHTML = '<div class="error-message">Try again !</div>';
120
+ return;
121
+ }
122
+
123
+ const resultsCount = Object.keys(results).length;
124
+ const resultsHeader = document.createElement('h2');
125
+ resultsHeader.textContent = `Got ${resultsCount} steps to follow`;
126
+ resultsContainer.appendChild(resultsHeader);
127
+
128
+ Object.entries(results).forEach(([id, step]) => {
129
+ const stepElement = document.createElement('div');
130
+ stepElement.classList.add('paper-result');
131
+
132
+ stepElement.innerHTML = `
133
+ <div class="paper-title">Step no. ${step.step_index}</div>
134
+ <div class="paper-abstract">${step.step_text}</div>
135
+ <div class="paper-id">Keywords: ${step.keywords}</div>
136
+ `;
137
+ resultsContainer.appendChild(stepElement);
138
+ })
139
+ }
140
+
141
  function displayResults(results) {
142
  resultsContainer.innerHTML = '';
143
 
 
166
  <div class="paper-id">ArXiv ID: ${id}</div>
167
  <a type="button" class="btn btn-primary" role="button" href="${paper.pdf}">Show PDF</a>
168
  <input type="button" value="Extract Text" doc="${id}" class="extractText btn btn-success"/>
 
169
  `;
170
  resultsContainer.appendChild(paperElement);
171
  });
 
175
  let id_doc = btn.getAttribute("doc");
176
  popupTitle.textContent = `PDF extraction - Document no ${id_doc}`;
177
  try {
178
+ const response = await fetch('/extract_pdf/arxiv_id', {
179
  method: 'POST',
180
  headers: {
181
  'Accept': 'application/json',
 
196
  popupText.innerHTML = `<div class="error-message">Search error: ${data.message}</div>`;
197
  return;
198
  }
199
+ text = "";
200
 
201
  popupText.textContent = data.text;
202
  } catch (error) {
 
207
  body.classList.add("no-scroll")
208
  });
209
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  }
211
 
212
  // Add event listeners
 
223
  });
224
 
225
  searchButton.addEventListener('click', performSearch);
226
+ researchButton.addEventListener('click', performResearch);
227
  limitCheckbox.addEventListener("change", handleCustomLimit);
228
  searchInput.addEventListener('keypress', (e) => {
229
  if (e.key === 'Enter') performSearch();
static/style.css CHANGED
@@ -4,10 +4,25 @@ body {
4
  margin: 0 auto;
5
  padding: 20px;
6
  }
 
 
 
 
 
 
7
  #search-container {
8
  display: flex;
9
  margin-bottom: 20px;
10
  }
 
 
 
 
 
 
 
 
 
11
  #keyword-input {
12
  flex-grow: 1;
13
  padding: 10px;
@@ -24,6 +39,20 @@ body {
24
  cursor: pointer;
25
  font-size: 16px;
26
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  #search-button:hover {
28
  background-color: #45a049;
29
  }
 
4
  margin: 0 auto;
5
  padding: 20px;
6
  }
7
+
8
+ #api-container {
9
+ display: flex;
10
+ margin-bottom: 20px;
11
+ }
12
+
13
  #search-container {
14
  display: flex;
15
  margin-bottom: 20px;
16
  }
17
+
18
+ #apikey-input {
19
+ flex-grow: 1;
20
+ padding: 10px;
21
+ font-size: 16px;
22
+ border: 1px solid #ccc;
23
+ border-radius: 4px 0 0 4px;
24
+ }
25
+
26
  #keyword-input {
27
  flex-grow: 1;
28
  padding: 10px;
 
39
  cursor: pointer;
40
  font-size: 16px;
41
  }
42
+
43
+ #plan-button {
44
+ padding: 10px 20px;
45
+ background-color: #4C50AF;
46
+ color: white;
47
+ border: none;
48
+ border-radius: 0 4px 4px 0;
49
+ cursor: pointer;
50
+ font-size: 16px;
51
+ }
52
+
53
+ #plan-button:hover {
54
+ background-color: #3e4a9b;
55
+ }
56
  #search-button:hover {
57
  background-color: #45a049;
58
  }
templates/index.html CHANGED
@@ -6,17 +6,21 @@
6
  </head>
7
  <body>
8
  <h1>Search ArXiv Documents</h1>
 
 
 
9
  <div id="search-container">
10
  <input type="text" id="keyword-input" placeholder="Enter search keyword">
11
  <input type="number" min="1" id="limit-input" value="15" disabled placeholder="Enter limit of documents">
12
  <button id="search-button">Search</button>
 
13
  </div>
14
  <input type="checkbox" name="limit" id="limit-check"><label for="limit-check">Custom limit ?</label>
15
  <div id="results-container"></div>
16
  <div id="popup" class="popup">
17
  <div class="popup-content">
18
  <span class="close">&times;</span>
19
- <h2 id="popup-title">Fenêtre Popup</h2>
20
  <div class="scrollable-text">
21
 
22
  </div>
 
6
  </head>
7
  <body>
8
  <h1>Search ArXiv Documents</h1>
9
+ <div id="api-container">
10
+ <input type="password" id="apikey-input" placeholder="Enter API key (required)">
11
+ </div>
12
  <div id="search-container">
13
  <input type="text" id="keyword-input" placeholder="Enter search keyword">
14
  <input type="number" min="1" id="limit-input" value="15" disabled placeholder="Enter limit of documents">
15
  <button id="search-button">Search</button>
16
+ <button id="plan-button">Get Plan</button>
17
  </div>
18
  <input type="checkbox" name="limit" id="limit-check"><label for="limit-check">Custom limit ?</label>
19
  <div id="results-container"></div>
20
  <div id="popup" class="popup">
21
  <div class="popup-content">
22
  <span class="close">&times;</span>
23
+ <h2 id="popup-title"></h2>
24
  <div class="scrollable-text">
25
 
26
  </div>