GitsSaikat commited on
Commit
832926c
·
0 Parent(s):

first commit

Browse files
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ from research import arxiv_research # Import the corrected research module
4
+ import asyncio
5
+ from PIL import Image
6
+
7
+ # Page configuration
8
+ st.set_page_config(
9
+ page_title="Deep Research Arxiv",
10
+ page_icon="🔍",
11
+ layout="wide",
12
+ initial_sidebar_state="expanded"
13
+ )
14
+
15
+ # Load images (Make sure you have these image files in your directory)
16
+ logo = Image.open('logo_a.png') # replace with your logo
17
+ banner = Image.open('banner.png') # replace with your banner
18
+
19
+
20
+ # Custom CSS (Optional - for styling)
21
+ st.markdown("""
22
+ <style>
23
+ .stImage > img {
24
+ border-radius: 10px;
25
+ margin-bottom: 2rem;
26
+ }
27
+ .api-container {
28
+ background-color: #f8f9fa;
29
+ padding: 1.5rem;
30
+ border-radius: 10px;
31
+ margin-bottom: 2rem;
32
+ border: 1px solid #e0e0e0;
33
+ }
34
+ .api-header {
35
+ color: #1E88E5;
36
+ font-size: 1.2rem;
37
+ margin-bottom: 1rem;
38
+ }
39
+ </style>
40
+ """, unsafe_allow_html=True)
41
+
42
+ # Initialize session state for API key configuration
43
+ if 'api_keys_configured' not in st.session_state:
44
+ st.session_state.api_keys_configured = False
45
+
46
+ # Sidebar configuration for OpenRouter API key
47
+ with st.sidebar:
48
+ st.image(logo, width=200, use_container_width=True)
49
+ st.markdown("### ⚙️ API Configuration")
50
+
51
+ with st.expander("Configure OpenRouter API Key", expanded=not st.session_state.api_keys_configured):
52
+ api_form = st.form("api_keys_form")
53
+ with api_form:
54
+ openrouter_key = api_form.text_input(
55
+ "OpenRouter API Key",
56
+ type="password",
57
+ value=st.session_state.get('openrouter_key', ''),
58
+ help="Required for language model access (literature review generation)."
59
+ )
60
+
61
+ if api_form.form_submit_button("Save API Key"):
62
+ if not openrouter_key:
63
+ st.error("❌ OpenRouter API key is required!")
64
+ else:
65
+ st.session_state.openrouter_key = openrouter_key
66
+ st.session_state.api_keys_configured = True
67
+ st.success("✅ OpenRouter API key saved successfully!")
68
+ st.rerun() # Re-run the app to update the state
69
+
70
+ if st.session_state.api_keys_configured:
71
+ st.success("✅ OpenRouter API Key configured")
72
+
73
+ st.markdown("### 🔑 Get API Key")
74
+ st.markdown("""
75
+ - [OpenRouter API Key](https://openrouter.ai/keys)
76
+ """)
77
+
78
+ # Main content area of the app
79
+ st.title("🔍 Deep Research Arxiv")
80
+ col1, col2 = st.columns(2)
81
+ with col1:
82
+ st.markdown("""
83
+ <div style='background-color: #5dade2; padding: 1rem; border-radius: 10px; margin-bottom: 2rem;'>
84
+ <h4 style='color: #1565C0; margin-bottom: 0.5rem;'>Welcome to Deep Research Arxiv!</h4>
85
+ <p style='color: #424242;'>
86
+ This application helps you conduct literature reviews on Arxiv by:
87
+ <br>
88
+ • Searching Arxiv for relevant papers<br>
89
+ • Analyzing upto 100 papers based on query<br>
90
+ • Synthesizing a literature review with citations
91
+ </p>
92
+ </div>
93
+ """, unsafe_allow_html=True)
94
+ with col2:
95
+ st.markdown("### Tips for Better Results")
96
+ st.info("""
97
+
98
+ • Be specific in your query with clear and focused questions.
99
+
100
+ • Consider including relevant keywords.
101
+
102
+ • Don't choose too many papers; it might prompt the model to include irrelevant information if not enough relevant papers exist.
103
+ """)
104
+
105
+ with st.container():
106
+ with st.form("research_form", clear_on_submit=False):
107
+ st.markdown("### Research Parameters")
108
+
109
+ user_query = st.text_area(
110
+ "Research Query",
111
+ placeholder="Enter your research topic or question here...",
112
+ help="Be as specific as possible for better results (e.g., 'Quantum Machine Learning for Drug Discovery').",
113
+ height=100,
114
+ disabled=not st.session_state.api_keys_configured
115
+ )
116
+
117
+ paper_count = st.number_input(
118
+ "Number of papers to analyze",
119
+ min_value=10,
120
+ max_value=100,
121
+ value=20,
122
+ step=5,
123
+ help="Select number of papers to include (10-100)"
124
+ )
125
+
126
+ submitted = st.form_submit_button(
127
+ "🚀 Start Research",
128
+ disabled=not st.session_state.api_keys_configured
129
+ )
130
+
131
+ if not st.session_state.api_keys_configured:
132
+ st.warning("⚠️ Please configure your OpenRouter API key in the sidebar to enable research.")
133
+
134
+
135
+
136
+
137
+ # Function to run the research (using asyncio.run)
138
+ def run_research(user_query, paper_count):
139
+ """Execute research with specified paper count."""
140
+ arxiv_research.OPENROUTER_API_KEY = st.session_state.openrouter_key
141
+ return asyncio.run(arxiv_research.research_flow(user_query, paper_count))
142
+
143
+
144
+ # Handling form submission and displaying results
145
+ if submitted and st.session_state.api_keys_configured:
146
+ if not user_query.strip():
147
+ st.error("⚠️ Please enter a research query.")
148
+ else:
149
+ with st.spinner(f"🔄 Analyzing {paper_count} papers from Arxiv..."):
150
+ try:
151
+ literature_review = run_research(user_query, paper_count)
152
+
153
+ # Display results
154
+ st.markdown("""
155
+ <div class='report-container'>
156
+ <h3 style='color: #1E88E5; margin-bottom: 1rem;'>📊 Literature Review</h3>
157
+ </div>
158
+ """, unsafe_allow_html=True)
159
+
160
+ st.markdown(literature_review, unsafe_allow_html=False)
161
+
162
+ # Extract citation statistics
163
+ if "Citation Statistics:" in literature_review:
164
+ stats_section = literature_review.split("Citation Statistics:")[-1]
165
+ st.info(f"📈 Citation Statistics:{stats_section}")
166
+
167
+ st.download_button(
168
+ label="📥 Download Literature Review",
169
+ data=literature_review,
170
+ file_name="nature_style_review.txt",
171
+ mime="text/plain"
172
+ )
173
+
174
+ except Exception as e:
175
+ st.error(f"❌ An error occurred: {e}")
176
+
177
+ st.markdown("""
178
+ <div style='text-align: center; color: #666; padding: 2rem;'>
179
+ <p>Built by GItsSaikat ❤️</p>
180
+ </div>
181
+ """, unsafe_allow_html=True)
banner.png ADDED
logo.png ADDED
logo_a.png ADDED
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ aiohttp
3
+ nest_asyncio
research/__pycache__/arxiv_research.cpython-312.pyc ADDED
Binary file (9.64 kB). View file
 
research/__pycache__/research.cpython-312.pyc ADDED
Binary file (8.92 kB). View file
 
research/arxiv_research.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # research/arxiv_research.py
2
+ import asyncio
3
+ import aiohttp
4
+ import nest_asyncio
5
+ import xml.etree.ElementTree as ET # For parsing Arxiv XML response
6
+ nest_asyncio.apply()
7
+
8
+ # API Endpoints
9
+ OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
10
+ ARXIV_API_URL = "http://export.arxiv.org/api/query"
11
+
12
+ # Global API Key (You'll set this in app.py)
13
+ OPENROUTER_API_KEY = ""
14
+ DEFAULT_MODEL = "google/gemini-2.0-flash-lite-preview-02-05:free"
15
+
16
+ FIXED_PAPER_COUNT = 70
17
+ async def call_openrouter_async(session, messages, model=DEFAULT_MODEL):
18
+ """
19
+ Make an asynchronous request to the OpenRouter chat completion API.
20
+ Returns the assistant's reply text.
21
+ """
22
+ headers = {
23
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
24
+ "HTTP-Referer": "https://github.com/Pygen",
25
+ "X-Title": "Arxiv Literature Review Assistant",
26
+ "Content-Type": "application/json"
27
+ }
28
+
29
+ payload = {
30
+ "model": model,
31
+ "messages": messages,
32
+ "temperature": 0.7,
33
+ "max_tokens": 4096
34
+ }
35
+
36
+ try:
37
+ async with session.post(OPENROUTER_URL, headers=headers, json=payload) as resp:
38
+ if resp.status == 200:
39
+ result = await resp.json()
40
+ return result['choices'][0]['message']['content']
41
+ else:
42
+ text = await resp.text()
43
+ print(f"OpenRouter API error: {resp.status} - {text}")
44
+ return None
45
+ except Exception as e:
46
+ print("Error during OpenRouter call:", e)
47
+ return None
48
+
49
+ async def search_arxiv_async(session, query, max_results=100):
50
+ """
51
+ Search Arxiv API (no API key needed) and return paper entries.
52
+ """
53
+ params = {
54
+ 'search_query': query,
55
+ 'start': 0,
56
+ 'max_results': max_results,
57
+ 'sortBy': 'relevance',
58
+ 'sortOrder': 'descending'
59
+ }
60
+ paper_entries = []
61
+ try:
62
+ async with session.get(ARXIV_API_URL, params=params) as response:
63
+ if response.status == 200:
64
+ xml_content = await response.text()
65
+ root = ET.fromstring(xml_content)
66
+ namespace = {'atom': 'http://www.w3.org/2005/Atom'}
67
+
68
+ entries = root.findall('atom:entry', namespace)
69
+ for entry in entries:
70
+ title_element = entry.find('atom:title', namespace)
71
+ abstract_element = entry.find('atom:summary', namespace)
72
+ url_element = entry.find('atom:id', namespace)
73
+ authors_elements = entry.findall('atom:author/atom:name', namespace)
74
+ published_element = entry.find('atom:published', namespace) # Get publication date
75
+
76
+ authors = [author.text for author in authors_elements] if authors_elements else ["N/A"]
77
+ title = title_element.text.strip() if title_element is not None else "N/A"
78
+ abstract = abstract_element.text.strip().replace('\n', ' ') if abstract_element is not None else "N/A"
79
+ url = url_element.text.strip() if url_element is not None else "N/A"
80
+ published = published_element.text.strip() if published_element is not None else "N/A"
81
+ year = published[:4] if published else "N/A" #Extract the year.
82
+
83
+ paper_entries.append({
84
+ 'title': title,
85
+ 'abstract': abstract,
86
+ 'url': url,
87
+ 'authors': ', '.join(authors),
88
+ 'year': year
89
+ })
90
+ else:
91
+ print(f"Arxiv API error: {response.status}")
92
+ return []
93
+ except Exception as e:
94
+ print(f"Error during Arxiv API call: {e}")
95
+ return []
96
+ return paper_entries
97
+
98
+ async def prepare_references(paper_entries):
99
+ """Prepare reference list from paper entries"""
100
+ references = []
101
+ for idx, paper in enumerate(paper_entries, 1):
102
+ references.append({
103
+ 'citation_number': idx,
104
+ 'authors': paper['authors'],
105
+ 'title': paper['title'],
106
+ 'year': paper['year'],
107
+ 'url': paper['url'],
108
+ 'abstract': paper['abstract'],
109
+ 'citation_key': f"[{idx}]"
110
+ })
111
+ return references
112
+
113
+ async def generate_bibtex_entry(ref):
114
+ """Generate BibTeX entry for a paper."""
115
+ arxiv_id = ref['url'].split('/')[-1]
116
+ bibtex = (
117
+ f"@article{{{arxiv_id},\n"
118
+ f" author = {{{ref['authors']}}},\n"
119
+ f" title = {{{ref['title']}}},\n"
120
+ f" year = {{{ref['year']}}},\n"
121
+ f" eprint = {{{arxiv_id}}},\n"
122
+ f" archivePrefix = {{arXiv}},\n"
123
+ f" primaryClass = {{cs.LG}},\n" # You might want to make this dynamic
124
+ f" url = {{{ref['url']}}}\n"
125
+ f"}}\n\n" # Added an extra newline after the BibTeX entry
126
+ )
127
+ return bibtex
128
+
129
+ async def generate_literature_review_async(session, user_query, paper_entries):
130
+ """
131
+ Generate literature review based on prepared references.
132
+ """
133
+ # First prepare all references
134
+ references = await prepare_references(paper_entries)
135
+
136
+ # Prepare paper information with citations
137
+ papers_info = []
138
+ for ref in references:
139
+ papers_info.append(
140
+ f"Paper {ref['citation_key']}:\n"
141
+ f"Title: {ref['title']}\n"
142
+ f"Abstract: {ref['abstract']}\n"
143
+ f"Citation: Use {ref['citation_key']} to cite this paper"
144
+ )
145
+
146
+ # Generate Nature-style review
147
+ review_prompt = (
148
+ "Write a comprehensive literature review in Nature journal style. "
149
+ "Requirements:\n"
150
+ "1. Use formal Nature journal style\n"
151
+ "2. Begin with a compelling introduction\n"
152
+ "3. Organize findings into clear themes\n"
153
+ "4. Use provided citation numbers [n] when discussing papers\n"
154
+ "5. Each paper must be cited at least once\n"
155
+ "6. Make connections between related papers\n"
156
+ "7. Conclude with future directions\n"
157
+ "7. Make sure the literature review is at least 6000 words if the {paper_count} are more than 70, and at least 4000 words when the {paper_count} are 40 to 70, andat least 2500 words when the {paper_count} are 10 to 39.\n"
158
+ "8. DO NOT include references - they will be added separately\n"
159
+ f"\nTopic: {user_query}\n\n"
160
+ f"Available Papers:\n\n{chr(10).join(papers_info)}"
161
+ )
162
+
163
+ messages = [
164
+ {"role": "system", "content": "You are a Nature journal editor writing a literature review."},
165
+ {"role": "user", "content": review_prompt}
166
+ ]
167
+
168
+ literature_review = await call_openrouter_async(session, messages)
169
+
170
+ if literature_review:
171
+ # Format references in Nature style with clickable links
172
+ refs_section = "\nReferences\n"
173
+ bibtex_section = "\nBibTeX Citations:\n\n" # New section for BibTeX
174
+
175
+ for ref in references:
176
+ arxiv_id = ref['url'].split('/')[-1]
177
+ refs_section += (
178
+ f"{ref['citation_number']}. {ref['authors']}. "
179
+ f"{ref['title']}. "
180
+ f"arXiv:{arxiv_id} ({ref['year']}). "
181
+ f"Available at: {ref['url']}\n"
182
+ )
183
+ bibtex_section += await generate_bibtex_entry(ref) # Generate BibTeX entry
184
+
185
+ # Add section separator
186
+ final_text = (
187
+ literature_review +
188
+ "\n" + "="*50 + "\n" +
189
+ refs_section +
190
+ "\n" + "="*50 + "\n" + # Separator for BibTeX
191
+ bibtex_section
192
+ )
193
+
194
+ return final_text
195
+
196
+ return "Error generating literature review."
197
+
198
+ async def research_flow(user_query, paper_count):
199
+ """
200
+ Execute research flow with user-specified paper count.
201
+ """
202
+ async with aiohttp.ClientSession() as session:
203
+ # Step 1: Get exact number of papers requested
204
+ paper_entries = await search_arxiv_async(session, user_query, max_results=paper_count)
205
+
206
+ if not paper_entries:
207
+ return "No relevant papers found. Please try a different query."
208
+
209
+ # Step 2: Generate review with prepared references
210
+ literature_review = await generate_literature_review_async(session, user_query, paper_entries[:paper_count])
211
+ return literature_review
212
+
213
+ # def main():
214
+ # """CLI entry point."""
215
+ # user_query = input("Enter your research topic/question: ").strip()
216
+ # final_report = asyncio.run(research_flow(user_query))
217
+ # print("\n==== LITERATURE REVIEW ====\n")
218
+ # print(final_report)
219
+
220
+ # if __name__ == "__main__":
221
+ # main()