Spaces:

AlignAI
/

Deep-Research-Arxiv

Running

App Files Files Community

GitsSaikat commited on Feb 12

Commit

832926c

0 Parent(s):

first commit

Browse files

Files changed (8) hide show

app.py +181 -0
banner.png +0 -0
logo.png +0 -0
logo_a.png +0 -0
requirements.txt +3 -0
research/__pycache__/arxiv_research.cpython-312.pyc +0 -0
research/__pycache__/research.cpython-312.pyc +0 -0
research/arxiv_research.py +221 -0

app.py ADDED Viewed

	@@ -0,0 +1,181 @@

+# app.py
+import streamlit as st
+from research import arxiv_research  # Import the corrected research module
+import asyncio
+from PIL import Image
+# Page configuration
+st.set_page_config(
+    page_title="Deep Research Arxiv",
+    page_icon="🔍",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Load images (Make sure you have these image files in your directory)
+logo = Image.open('logo_a.png')  #  replace with your logo
+banner = Image.open('banner.png') # replace with your banner
+# Custom CSS (Optional - for styling)
+st.markdown("""
+    <style>
+    .stImage > img {
+        border-radius: 10px;
+        margin-bottom: 2rem;
+    }
+    .api-container {
+        background-color: #f8f9fa;
+        padding: 1.5rem;
+        border-radius: 10px;
+        margin-bottom: 2rem;
+        border: 1px solid #e0e0e0;
+    }
+    .api-header {
+        color: #1E88E5;
+        font-size: 1.2rem;
+        margin-bottom: 1rem;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# Initialize session state for API key configuration
+if 'api_keys_configured' not in st.session_state:
+    st.session_state.api_keys_configured = False
+# Sidebar configuration for OpenRouter API key
+with st.sidebar:
+    st.image(logo, width=200, use_container_width=True)
+    st.markdown("### ⚙️ API Configuration")
+    with st.expander("Configure OpenRouter API Key", expanded=not st.session_state.api_keys_configured):
+        api_form = st.form("api_keys_form")
+        with api_form:
+            openrouter_key = api_form.text_input(
+                "OpenRouter API Key",
+                type="password",
+                value=st.session_state.get('openrouter_key', ''),
+                help="Required for language model access (literature review generation)."
+            )
+            if api_form.form_submit_button("Save API Key"):
+                if not openrouter_key:
+                    st.error("❌ OpenRouter API key is required!")
+                else:
+                    st.session_state.openrouter_key = openrouter_key
+                    st.session_state.api_keys_configured = True
+                    st.success("✅ OpenRouter API key saved successfully!")
+                    st.rerun()  # Re-run the app to update the state
+    if st.session_state.api_keys_configured:
+        st.success("✅ OpenRouter API Key configured")
+    st.markdown("### 🔑 Get API Key")
+    st.markdown("""
+        - [OpenRouter API Key](https://openrouter.ai/keys)
+    """)
+# Main content area of the app
+st.title("🔍 Deep Research Arxiv")
+col1, col2 = st.columns(2)
+with col1:
+    st.markdown("""
+    <div style='background-color: #5dade2; padding: 1rem; border-radius: 10px; margin-bottom: 2rem;'>
+        <h4 style='color: #1565C0; margin-bottom: 0.5rem;'>Welcome to Deep Research Arxiv!</h4>
+        <p style='color: #424242;'>
+            This application helps you conduct literature reviews on Arxiv by:
+            <br>
+            • Searching Arxiv for relevant papers<br>
+            • Analyzing upto 100 papers based on query<br>
+            • Synthesizing a literature review with citations
+        </p>
+    </div>
+    """, unsafe_allow_html=True)
+with col2:
+    st.markdown("### Tips for Better Results")
+    st.info("""
+        • Be specific in your query with clear and focused questions.
+        • Consider including relevant keywords.
+        • Don't choose too many papers; it might prompt the model to include irrelevant information if not enough relevant papers exist.
+    """)
+with st.container():
+    with st.form("research_form", clear_on_submit=False):
+        st.markdown("### Research Parameters")
+        user_query = st.text_area(
+            "Research Query",
+            placeholder="Enter your research topic or question here...",
+            help="Be as specific as possible for better results (e.g., 'Quantum Machine Learning for Drug Discovery').",
+            height=100,
+            disabled=not st.session_state.api_keys_configured
+        )
+        paper_count = st.number_input(
+            "Number of papers to analyze",
+            min_value=10,
+            max_value=100,
+            value=20,
+            step=5,
+            help="Select number of papers to include (10-100)"
+        )
+        submitted = st.form_submit_button(
+            "🚀 Start Research",
+            disabled=not st.session_state.api_keys_configured
+        )
+        if not st.session_state.api_keys_configured:
+            st.warning("⚠️ Please configure your OpenRouter API key in the sidebar to enable research.")
+# Function to run the research (using asyncio.run)
+def run_research(user_query, paper_count):
+    """Execute research with specified paper count."""
+    arxiv_research.OPENROUTER_API_KEY = st.session_state.openrouter_key
+    return asyncio.run(arxiv_research.research_flow(user_query, paper_count))
+# Handling form submission and displaying results
+if submitted and st.session_state.api_keys_configured:
+    if not user_query.strip():
+        st.error("⚠️ Please enter a research query.")
+    else:
+        with st.spinner(f"🔄 Analyzing {paper_count} papers from Arxiv..."):
+            try:
+                literature_review = run_research(user_query, paper_count)
+                # Display results
+                st.markdown("""
+                    <div class='report-container'>
+                        <h3 style='color: #1E88E5; margin-bottom: 1rem;'>📊 Literature Review</h3>
+                    </div>
+                """, unsafe_allow_html=True)
+                st.markdown(literature_review, unsafe_allow_html=False)
+                # Extract citation statistics
+                if "Citation Statistics:" in literature_review:
+                    stats_section = literature_review.split("Citation Statistics:")[-1]
+                    st.info(f"📈 Citation Statistics:{stats_section}")
+                st.download_button(
+                    label="📥 Download Literature Review",
+                    data=literature_review,
+                    file_name="nature_style_review.txt",
+                    mime="text/plain"
+                )
+            except Exception as e:
+                st.error(f"❌ An error occurred: {e}")
+st.markdown("""
+    <div style='text-align: center; color: #666; padding: 2rem;'>
+        <p>Built by GItsSaikat ❤️</p>
+    </div>
+""", unsafe_allow_html=True)

banner.png ADDED Viewed

logo.png ADDED Viewed

logo_a.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit
+aiohttp
+nest_asyncio

research/__pycache__/arxiv_research.cpython-312.pyc ADDED Viewed

Binary file (9.64 kB). View file

research/__pycache__/research.cpython-312.pyc ADDED Viewed

Binary file (8.92 kB). View file

research/arxiv_research.py ADDED Viewed

	@@ -0,0 +1,221 @@

+# research/arxiv_research.py
+import asyncio
+import aiohttp
+import nest_asyncio
+import xml.etree.ElementTree as ET  # For parsing Arxiv XML response
+nest_asyncio.apply()
+# API Endpoints
+OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
+ARXIV_API_URL = "http://export.arxiv.org/api/query"
+# Global API Key (You'll set this in app.py)
+OPENROUTER_API_KEY = ""
+DEFAULT_MODEL = "google/gemini-2.0-flash-lite-preview-02-05:free"
+FIXED_PAPER_COUNT = 70
+async def call_openrouter_async(session, messages, model=DEFAULT_MODEL):
+    """
+    Make an asynchronous request to the OpenRouter chat completion API.
+    Returns the assistant's reply text.
+    """
+    headers = {
+        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+        "HTTP-Referer": "https://github.com/Pygen",
+        "X-Title": "Arxiv Literature Review Assistant",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "model": model,
+        "messages": messages,
+        "temperature": 0.7,
+        "max_tokens": 4096
+    }
+    try:
+        async with session.post(OPENROUTER_URL, headers=headers, json=payload) as resp:
+            if resp.status == 200:
+                result = await resp.json()
+                return result['choices'][0]['message']['content']
+            else:
+                text = await resp.text()
+                print(f"OpenRouter API error: {resp.status} - {text}")
+                return None
+    except Exception as e:
+        print("Error during OpenRouter call:", e)
+        return None
+async def search_arxiv_async(session, query, max_results=100):
+    """
+    Search Arxiv API (no API key needed) and return paper entries.
+    """
+    params = {
+        'search_query': query,
+        'start': 0,
+        'max_results': max_results,
+        'sortBy': 'relevance',
+        'sortOrder': 'descending'
+    }
+    paper_entries = []
+    try:
+        async with session.get(ARXIV_API_URL, params=params) as response:
+            if response.status == 200:
+                xml_content = await response.text()
+                root = ET.fromstring(xml_content)
+                namespace = {'atom': 'http://www.w3.org/2005/Atom'}
+                entries = root.findall('atom:entry', namespace)
+                for entry in entries:
+                    title_element = entry.find('atom:title', namespace)
+                    abstract_element = entry.find('atom:summary', namespace)
+                    url_element = entry.find('atom:id', namespace)
+                    authors_elements = entry.findall('atom:author/atom:name', namespace)
+                    published_element = entry.find('atom:published', namespace)  # Get publication date
+                    authors = [author.text for author in authors_elements] if authors_elements else ["N/A"]
+                    title = title_element.text.strip() if title_element is not None else "N/A"
+                    abstract = abstract_element.text.strip().replace('\n', ' ') if abstract_element is not None else "N/A"
+                    url = url_element.text.strip() if url_element is not None else "N/A"
+                    published = published_element.text.strip() if published_element is not None else "N/A"
+                    year = published[:4] if published else "N/A" #Extract the year.
+                    paper_entries.append({
+                        'title': title,
+                        'abstract': abstract,
+                        'url': url,
+                        'authors': ', '.join(authors),
+                        'year': year
+                    })
+            else:
+                print(f"Arxiv API error: {response.status}")
+                return []
+    except Exception as e:
+        print(f"Error during Arxiv API call: {e}")
+        return []
+    return paper_entries
+async def prepare_references(paper_entries):
+    """Prepare reference list from paper entries"""
+    references = []
+    for idx, paper in enumerate(paper_entries, 1):
+        references.append({
+            'citation_number': idx,
+            'authors': paper['authors'],
+            'title': paper['title'],
+            'year': paper['year'],
+            'url': paper['url'],
+            'abstract': paper['abstract'],
+            'citation_key': f"[{idx}]"
+        })
+    return references
+async def generate_bibtex_entry(ref):
+    """Generate BibTeX entry for a paper."""
+    arxiv_id = ref['url'].split('/')[-1]
+    bibtex = (
+        f"@article{{{arxiv_id},\n"
+        f"  author = {{{ref['authors']}}},\n"
+        f"  title = {{{ref['title']}}},\n"
+        f"  year = {{{ref['year']}}},\n"
+        f"  eprint = {{{arxiv_id}}},\n"
+        f"  archivePrefix = {{arXiv}},\n"
+        f"  primaryClass = {{cs.LG}},\n"  # You might want to make this dynamic
+        f"  url = {{{ref['url']}}}\n"
+        f"}}\n\n"  # Added an extra newline after the BibTeX entry
+    )
+    return bibtex
+async def generate_literature_review_async(session, user_query, paper_entries):
+    """
+    Generate literature review based on prepared references.
+    """
+    # First prepare all references
+    references = await prepare_references(paper_entries)
+    # Prepare paper information with citations
+    papers_info = []
+    for ref in references:
+        papers_info.append(
+            f"Paper {ref['citation_key']}:\n"
+            f"Title: {ref['title']}\n"
+            f"Abstract: {ref['abstract']}\n"
+            f"Citation: Use {ref['citation_key']} to cite this paper"
+        )
+    # Generate Nature-style review
+    review_prompt = (
+        "Write a comprehensive literature review in Nature journal style. "
+        "Requirements:\n"
+        "1. Use formal Nature journal style\n"
+        "2. Begin with a compelling introduction\n"
+        "3. Organize findings into clear themes\n"
+        "4. Use provided citation numbers [n] when discussing papers\n"
+        "5. Each paper must be cited at least once\n"
+        "6. Make connections between related papers\n"
+        "7. Conclude with future directions\n"
+        "7. Make sure the literature review is at least 6000 words if the {paper_count} are more than 70, and at least 4000 words when the {paper_count} are 40 to 70, andat least 2500 words when the {paper_count} are 10 to 39.\n"
+        "8. DO NOT include references - they will be added separately\n"
+        f"\nTopic: {user_query}\n\n"
+        f"Available Papers:\n\n{chr(10).join(papers_info)}"
+    )
+    messages = [
+        {"role": "system", "content": "You are a Nature journal editor writing a literature review."},
+        {"role": "user", "content": review_prompt}
+    ]
+    literature_review = await call_openrouter_async(session, messages)
+    if literature_review:
+        # Format references in Nature style with clickable links
+        refs_section = "\nReferences\n"
+        bibtex_section = "\nBibTeX Citations:\n\n"  # New section for BibTeX
+        for ref in references:
+            arxiv_id = ref['url'].split('/')[-1]
+            refs_section += (
+                f"{ref['citation_number']}. {ref['authors']}. "
+                f"{ref['title']}. "
+                f"arXiv:{arxiv_id} ({ref['year']}). "
+                f"Available at: {ref['url']}\n"
+            )
+            bibtex_section += await generate_bibtex_entry(ref)  # Generate BibTeX entry
+        # Add section separator
+        final_text = (
+            literature_review +
+            "\n" + "="*50 + "\n" +
+            refs_section +
+            "\n" + "="*50 + "\n" +  # Separator for BibTeX
+            bibtex_section
+        )
+        return final_text
+    return "Error generating literature review."
+async def research_flow(user_query, paper_count):
+    """
+    Execute research flow with user-specified paper count.
+    """
+    async with aiohttp.ClientSession() as session:
+        # Step 1: Get exact number of papers requested
+        paper_entries = await search_arxiv_async(session, user_query, max_results=paper_count)
+        if not paper_entries:
+            return "No relevant papers found. Please try a different query."
+        # Step 2: Generate review with prepared references
+        literature_review = await generate_literature_review_async(session, user_query, paper_entries[:paper_count])
+        return literature_review
+# def main():
+#     """CLI entry point."""
+#     user_query = input("Enter your research topic/question: ").strip()
+#     final_report = asyncio.run(research_flow(user_query))
+#     print("\n==== LITERATURE REVIEW ====\n")
+#     print(final_report)
+# if __name__ == "__main__":
+#     main()