Spaces:

OrganizedProgrammers
/

arXiv

Sleeping

App Files Files Community

Omar ID EL MOUMEN commited on Mar 31

Commit

ad66b01

1 Parent(s): 62eaf4f

Reworked app

Browse files

Files changed (4) hide show

app.py +17 -21
static/script.js +79 -55
static/style.css +29 -0
templates/index.html +5 -1

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
@@ -7,7 +7,6 @@ import xml.etree.ElementTree as xmlparser
 import requests
 from pydantic import BaseModel
 import sys
-import random
 import fitz
 import re,os,json
 from io import BytesIO
@@ -116,18 +115,13 @@ async def extract_arxiv_pdf(document: DocumentID):
             for title in titles:
                 if title[0] == 1 or title[0] == 2:
                     main_titles.append((title[0], title[1]))
-        return {"pub_id": document.doc_id, "titles": [(t[0],re.sub(r"\s+", " ", remove_punctuations(remove_in_betweens(t[1]))).strip()) for t in main_titles], "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": document.doc_id, "titles": "No titles found !", "text": postprocess_text, "error": False}
     else:
         print("ID: " + document.doc_id)
         print("URL: " + f"http://arxiv.org/pdf/{document.doc_id}")
         print("Status code: " + str(pdf_req.status_code))
         return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
-@app.post("/extract_pdf/arxiv_id/random")
-async def extract_random_arxiv_pdf(query: Query):
-    pubs = await get_articles(query)
-    return await extract_arxiv_pdf(random.choice(list(pubs["message"].keys())))
 @app.post("/extract_pdf/url")
 async def extract_pdf(pdf: WebPDF):
     pdf_req = requests.get(pdf.url)
@@ -152,12 +146,12 @@ def researcher(model, user, token):
   url = 'https://api.groq.com/openai/v1/chat/completions'
   headers = {
         'Content-Type': 'application/json',
-        'Authorization': f'Bearer {token}'
   }
   system_msg = {
       'role': 'system',
       'content': (
-          'You are an experience PhD professor with 20 years experience in research. You help the user build their research plan based on the following examples. build the plan according to the examples without further questions. provide the steps of the plan in a form of research requests to arxiv, nothing else:\n'
           '''<example>
 <search-request>
 Help me research recent AI-powered marketing campaigns to benchmark for 2025 planning
@@ -288,8 +282,8 @@ research how new business models beyond ROI could benefit societal impact of 6G
 (6) Find information on potential risks and challenges associated with new business models for 6G technology.
 </search-plan>
 </example>\n'''
-    'Optionally, do not precise the sources, as we search on every websites that we possibly can.'
-    'For the response format, send a JSON of this format : [{"step_index": The step number, "step_text": What we have to do, "keywords": The important keywords separated by spaces (no comma) (important: the keywords that we will use for search engines and APIs, so get rid of `research papers`, `articles`, ... keywords), `privilegie les abbreviations`}, ...] Take those for examples :\n'
     '''<example><search-request>Provide a plan for 6G challenges</search-request><search-plan>[
     {
         "step_index": 1,
@@ -372,21 +366,23 @@ research how new business models beyond ROI could benefit societal impact of 6G
 class GroqRequest(BaseModel):
     model: str
     user: str
-    token: str
-class Plan(BaseModel):
-    plan: dict
-@app.post("/get_plan")
-def get_research_plan(infos: GroqRequest):
-    plan = researcher(infos.model, infos.user, infos.token)
     plan = json.loads(re.sub(r"\s+", " ", plan))
     return {"error": False, "plan": plan}
 @app.post("/search/plan/arxiv")
-async def get_articles_from_plan(infos: GroqRequest):
     plan_articles = []
-    plan = get_research_plan(infos)["plan"]
     for step in plan:
         index, inst, kws = step.values()
         data = await get_articles(Query(keyword=kws, limit=5))

+from fastapi import FastAPI, Header
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
 import requests
 from pydantic import BaseModel
 import sys
 import fitz
 import re,os,json
 from io import BytesIO
             for title in titles:
                 if title[0] == 1 or title[0] == 2:
                     main_titles.append((title[0], title[1]))
+        return {"pub_id": document.doc_id, "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": document.doc_id, "titles": "No titles found !", "text": postprocess_text, "error": False}
     else:
         print("ID: " + document.doc_id)
         print("URL: " + f"http://arxiv.org/pdf/{document.doc_id}")
         print("Status code: " + str(pdf_req.status_code))
         return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
 @app.post("/extract_pdf/url")
 async def extract_pdf(pdf: WebPDF):
     pdf_req = requests.get(pdf.url)
   url = 'https://api.groq.com/openai/v1/chat/completions'
   headers = {
         'Content-Type': 'application/json',
+        'Authorization': f'Bearer {token}',
   }
   system_msg = {
       'role': 'system',
       'content': (
+          'You are an experience PhD professor with 20 years experience in research. You help the user build their research plan based on the following examples. build the plan according to the examples without further questions. provide the steps of the plan in a form of research requests to search engines of public document publisher or web searching purposes, nothing else:\n'
           '''<example>
 <search-request>
 Help me research recent AI-powered marketing campaigns to benchmark for 2025 planning
 (6) Find information on potential risks and challenges associated with new business models for 6G technology.
 </search-plan>
 </example>\n'''
+    'Optionally, do not precise the sources, as we search on every websites that we possibly can. Take note that sometimes, the user will send you keywords only, just provide report of them.\n'
+    'For the response format, you must send a JSON of this format : [{"step_index": The step number, "step_text": What we have to do, "keywords": The important keywords separated by spaces (no comma) (important: the keywords that we will use for search engines and APIs, so get rid of `research papers`, `articles`, ... keywords), `privilegie les abbreviations`}, ...] Take those for examples :\n'
     '''<example><search-request>Provide a plan for 6G challenges</search-request><search-plan>[
     {
         "step_index": 1,
 class GroqRequest(BaseModel):
     model: str
     user: str
+@app.post("/search/plan")
+async def get_research_plan(infos: GroqRequest, api_key: str = Header(None, alias="GROQ_TOKEN")):
+    if api_key is None:
+        return {"error": True, "message": "Missing API key", "plan": ""}
+    plan = researcher(infos.model, infos.user, api_key)
+    if plan == "":
+        return {"error": True, "plan": "", "message": "Error while generating the research plan"}
     plan = json.loads(re.sub(r"\s+", " ", plan))
     return {"error": False, "plan": plan}
 @app.post("/search/plan/arxiv")
+async def get_arxiv_research_plan(infos: GroqRequest, api_key: str = Header(None, alias="GROQ_TOKEN")):
     plan_articles = []
+    plan = get_research_plan(infos, api_key)["plan"]
+    if plan == "":
+        return {"error": True, "message": "Error while generating the research plan"}
     for step in plan:
         index, inst, kws = step.values()
         data = await get_articles(Query(keyword=kws, limit=5))

static/script.js CHANGED Viewed

@@ -1,14 +1,62 @@
 document.addEventListener('DOMContentLoaded', function() {
     const searchInput = document.getElementById('keyword-input');
     const limitInput = document.getElementById("limit-input");
     const searchButton = document.getElementById('search-button');
     const limitCheckbox = document.getElementById("limit-check");
     const popupText = document.querySelector(".scrollable-text");
     const popupTitle = document.getElementById("popup-title");
     const body = document.body;
     const resultsContainer = document.getElementById('results-container');
-    // Function to perform the search
     async function performSearch() {
         const keyword = searchInput.value.trim();
         let limit = limitInput.value.trim();
@@ -25,7 +73,7 @@ document.addEventListener('DOMContentLoaded', function() {
         resultsContainer.innerHTML = '<div class="loading">Searching for ' + limit.toString() + ' papers about "' + keyword + '"...</div>';
         try {
-            const response = await fetch('https://om4r932-arxiv.hf.space/search', {
                 method: 'POST',
                 headers: {
                     'Accept': 'application/json',
@@ -64,7 +112,32 @@ document.addEventListener('DOMContentLoaded', function() {
         }
     }
-    // Function to display the results
     function displayResults(results) {
         resultsContainer.innerHTML = '';
@@ -93,7 +166,6 @@ document.addEventListener('DOMContentLoaded', function() {
                 <div class="paper-id">ArXiv ID: ${id}</div>
                 <a type="button" class="btn btn-primary" role="button" href="${paper.pdf}">Show PDF</a>
                 <input type="button" value="Extract Text" doc="${id}" class="extractText btn btn-success"/>
-                <input type="button" value="Get titles" doc="${id}" class="getTitle btn btn-danger"/>
             `;
             resultsContainer.appendChild(paperElement);
         });
@@ -103,7 +175,7 @@ document.addEventListener('DOMContentLoaded', function() {
                 let id_doc = btn.getAttribute("doc");
                 popupTitle.textContent = `PDF extraction - Document no ${id_doc}`;
                 try {
-                    const response = await fetch('https://om4r932-arxiv.hf.space/extract_pdf/arxiv_id', {
                         method: 'POST',
                         headers: {
                             'Accept': 'application/json',
@@ -124,6 +196,7 @@ document.addEventListener('DOMContentLoaded', function() {
                         popupText.innerHTML = `<div class="error-message">Search error: ${data.message}</div>`;
                         return;
                     }
                     popupText.textContent = data.text;
                 } catch (error) {
@@ -134,56 +207,6 @@ document.addEventListener('DOMContentLoaded', function() {
                 body.classList.add("no-scroll")
             });
         })
-        document.querySelectorAll(".getTitle").forEach(btn => {
-            btn.addEventListener("click", async function () {
-                let id_doc = btn.getAttribute("doc");
-                popupTitle.textContent = `Chapters - Document no ${id_doc}`;
-                try {
-                    const response = await fetch('https://om4r932-arxiv.hf.space/extract_pdf/arxiv_id', {
-                        method: 'POST',
-                        headers: {
-                            'Accept': 'application/json',
-                            'Content-Type': 'application/json'
-                        },
-                        body: JSON.stringify({
-                            doc_id: id_doc
-                        })
-                    });
-                    if (!response.ok) {
-                        throw new Error(`HTTP error! status: ${response.status}`);
-                    }
-                    const data = await response.json();
-                    if (data.error) {
-                        popupText.innerHTML = `<div class="error-message">Search error: ${data.message}</div>`;
-                        return;
-                    }
-                    let t = "";
-                    if(Array.isArray(data.titles)){
-                        for(const [lvl, title] of data.titles){
-                            if(lvl == 1){
-                                t += title + "<br>"
-                            } else if (lvl == 2) {
-                                t += "<pre>" + title + "<br></pre>"
-                            } else {
-                                t += title + "<br>"
-                            }
-                        }
-                        popupText.innerHTML = t;
-                    } else {
-                        popupText.textContent = data.titles;
-                    }
-                } catch (error) {
-                    console.error('Search error:', error);
-                    popupText.innerHTML = `<div class="error-message">Error performing search: ${error.message}</div>`;
-                }
-                document.getElementById("popup").style.display = "flex";
-                body.classList.add("no-scroll")
-            });
-        })
     }
     // Add event listeners
@@ -200,6 +223,7 @@ document.addEventListener('DOMContentLoaded', function() {
     });
     searchButton.addEventListener('click', performSearch);
     limitCheckbox.addEventListener("change", handleCustomLimit);
     searchInput.addEventListener('keypress', (e) => {
         if (e.key === 'Enter') performSearch();

 document.addEventListener('DOMContentLoaded', function() {
     const searchInput = document.getElementById('keyword-input');
+    const apiKey = document.getElementById('apikey-input');
     const limitInput = document.getElementById("limit-input");
     const searchButton = document.getElementById('search-button');
+    const researchButton = document.getElementById('plan-button');
     const limitCheckbox = document.getElementById("limit-check");
     const popupText = document.querySelector(".scrollable-text");
     const popupTitle = document.getElementById("popup-title");
     const body = document.body;
     const resultsContainer = document.getElementById('results-container');
+    async function performResearch() {
+        const keyword = searchInput.value.trim();
+        let limit = limitInput.value.trim();
+        if (!keyword) {
+            resultsContainer.innerHTML = '<div class="error-message">Please enter a search keyword.</div>';
+            return;
+        }
+        if (!limit){
+            limit = 15;
+        }
+        // Show loading indicator
+        resultsContainer.innerHTML = '<div class="loading">Generating research plan about "' + keyword + '"...</div>';
+        try {
+            const response = await fetch('/search/plan', {
+                method: 'POST',
+                headers: {
+                    'Accept': 'application/json',
+                    'Content-Type': 'application/json',
+                    "GROQ_TOKEN": apiKey.value
+                },
+                body: JSON.stringify({
+                    model: "llama-3.3-70b-versatile",
+                    user: keyword,
+                })
+            });
+            if (!response.ok) {
+                throw new Error(`HTTP error! status: ${response.status}`);
+            }
+            const data = await response.json();
+            if (data.error) {
+                resultsContainer.innerHTML = `<div class="error-message">Error: ${data.message}</div>`;
+                return;
+            }
+            displayPlan(data.plan);
+        } catch (error) {
+            console.error('Search error:', error);
+            resultsContainer.innerHTML = `<div class="error-message">Error performing generation: ${error.message}</div>`;
+        }
+    }
     async function performSearch() {
         const keyword = searchInput.value.trim();
         let limit = limitInput.value.trim();
         resultsContainer.innerHTML = '<div class="loading">Searching for ' + limit.toString() + ' papers about "' + keyword + '"...</div>';
         try {
+            const response = await fetch('/search', {
                 method: 'POST',
                 headers: {
                     'Accept': 'application/json',
         }
     }
+    function displayPlan(results) {
+        console.log(results);
+        resultsContainer.innerHTML = '';
+        if (!results || Object.keys(results).length === 0) {
+            resultsContainer.innerHTML = '<div class="error-message">Try again !</div>';
+            return;
+        }
+        const resultsCount = Object.keys(results).length;
+        const resultsHeader = document.createElement('h2');
+        resultsHeader.textContent = `Got ${resultsCount} steps to follow`;
+        resultsContainer.appendChild(resultsHeader);
+        Object.entries(results).forEach(([id, step]) => {
+            const stepElement = document.createElement('div');
+            stepElement.classList.add('paper-result');
+            stepElement.innerHTML = `
+                <div class="paper-title">Step no. ${step.step_index}</div>
+                <div class="paper-abstract">${step.step_text}</div>
+                <div class="paper-id">Keywords: ${step.keywords}</div>
+                `;
+            resultsContainer.appendChild(stepElement);
+        })
+    }
     function displayResults(results) {
         resultsContainer.innerHTML = '';
                 <div class="paper-id">ArXiv ID: ${id}</div>
                 <a type="button" class="btn btn-primary" role="button" href="${paper.pdf}">Show PDF</a>
                 <input type="button" value="Extract Text" doc="${id}" class="extractText btn btn-success"/>
             `;
             resultsContainer.appendChild(paperElement);
         });
                 let id_doc = btn.getAttribute("doc");
                 popupTitle.textContent = `PDF extraction - Document no ${id_doc}`;
                 try {
+                    const response = await fetch('/extract_pdf/arxiv_id', {
                         method: 'POST',
                         headers: {
                             'Accept': 'application/json',
                         popupText.innerHTML = `<div class="error-message">Search error: ${data.message}</div>`;
                         return;
                     }
+                    text = "";
                     popupText.textContent = data.text;
                 } catch (error) {
                 body.classList.add("no-scroll")
             });
         })
     }
     // Add event listeners
     });
     searchButton.addEventListener('click', performSearch);
+    researchButton.addEventListener('click', performResearch);
     limitCheckbox.addEventListener("change", handleCustomLimit);
     searchInput.addEventListener('keypress', (e) => {
         if (e.key === 'Enter') performSearch();

static/style.css CHANGED Viewed

@@ -4,10 +4,25 @@ body {
     margin: 0 auto;
     padding: 20px;
 }
 #search-container {
     display: flex;
     margin-bottom: 20px;
 }
 #keyword-input {
     flex-grow: 1;
     padding: 10px;
@@ -24,6 +39,20 @@ body {
     cursor: pointer;
     font-size: 16px;
 }
 #search-button:hover {
     background-color: #45a049;
 }

     margin: 0 auto;
     padding: 20px;
 }
+#api-container {
+    display: flex;
+    margin-bottom: 20px;
+}
 #search-container {
     display: flex;
     margin-bottom: 20px;
 }
+#apikey-input {
+    flex-grow: 1;
+    padding: 10px;
+    font-size: 16px;
+    border: 1px solid #ccc;
+    border-radius: 4px 0 0 4px;
+}
 #keyword-input {
     flex-grow: 1;
     padding: 10px;
     cursor: pointer;
     font-size: 16px;
 }
+#plan-button {
+    padding: 10px 20px;
+    background-color: #4C50AF;
+    color: white;
+    border: none;
+    border-radius: 0 4px 4px 0;
+    cursor: pointer;
+    font-size: 16px;
+}
+#plan-button:hover {
+    background-color: #3e4a9b;
+}
 #search-button:hover {
     background-color: #45a049;
 }

templates/index.html CHANGED Viewed

@@ -6,17 +6,21 @@
 </head>
 <body>
     <h1>Search ArXiv Documents</h1>
     <div id="search-container">
         <input type="text" id="keyword-input" placeholder="Enter search keyword">
         <input type="number" min="1" id="limit-input" value="15" disabled placeholder="Enter limit of documents">
         <button id="search-button">Search</button>
     </div>
     <input type="checkbox" name="limit" id="limit-check"><label for="limit-check">Custom limit ?</label>
     <div id="results-container"></div>
     <div id="popup" class="popup">
         <div class="popup-content">
             <span class="close">&times;</span>
-            <h2 id="popup-title">Fenêtre Popup</h2>
             <div class="scrollable-text">
             </div>

 </head>
 <body>
     <h1>Search ArXiv Documents</h1>
+    <div id="api-container">
+        <input type="password" id="apikey-input" placeholder="Enter API key (required)">
+    </div>
     <div id="search-container">
         <input type="text" id="keyword-input" placeholder="Enter search keyword">
         <input type="number" min="1" id="limit-input" value="15" disabled placeholder="Enter limit of documents">
         <button id="search-button">Search</button>
+        <button id="plan-button">Get Plan</button>
     </div>
     <input type="checkbox" name="limit" id="limit-check"><label for="limit-check">Custom limit ?</label>
     <div id="results-container"></div>
     <div id="popup" class="popup">
         <div class="popup-content">
             <span class="close">&times;</span>
+            <h2 id="popup-title"></h2>
             <div class="scrollable-text">
             </div>