YashJD commited on
Commit
4eba3d2
·
1 Parent(s): e107ee4
Files changed (7) hide show
  1. extract.py +50 -85
  2. infranew.py +244 -4
  3. new_research_paper.py +269 -10
  4. papers_filtered_export.csv +5 -0
  5. research22.py +41 -8
  6. research_combine2.py +16 -12
  7. sciclone.py +146 -92
extract.py CHANGED
@@ -33,106 +33,71 @@ def call_perplexity_api(prompt: str) -> str:
33
  st.error(f"API Error: {str(e)}")
34
  return ""
35
 
36
- def extract_text_from_pdf(pdf_file):
37
- """Extract text content from a PDF file."""
38
- pdf_reader = PyPDF2.PdfReader(pdf_file)
39
- text = ""
40
- for page in pdf_reader.pages:
41
- text += page.extract_text() + "\n"
42
- return text
43
-
44
- def analyze_paper(text: str, category: str) -> str:
45
- """Generate a prompt and get analysis for a specific category."""
46
- prompts = {
47
- "Summarized Abstract": "Extract and summarize the abstract from this research paper:",
48
- "Results": "What are the main results and findings from this research paper:",
49
- "Summarized Introduction": "Summarize the introduction section of this research paper:",
50
- "Methods Used": "What are the main methods and methodologies used in this research:",
51
- "Literature Survey": "Summarize the literature review or related work from this paper:",
52
- "Limitations": "What are the limitations mentioned in this research:",
53
- "Contributions": "What are the main contributions of this research:",
54
- "Practical Implications": "What are the practical implications of this research:",
55
- "Objectives": "What are the main objectives of this research:",
56
- "Findings": "What are the key findings from this research:",
57
- "Future Research": "What future research directions are suggested in this paper:",
58
- "Dependent Variables": "What are the dependent variables studied in this research:",
59
- "Independent Variables": "What are the independent variables studied in this research:",
60
- "Dataset": "What dataset(s) were used in this research:",
61
- "Problem Statement": "What is the main problem statement or research question:",
62
- "Challenges": "What challenges were faced or addressed in this research:",
63
- "Applications": "What are the potential applications of this research:"
64
- }
65
-
66
- prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
67
- return call_perplexity_api(prompt)
68
 
69
  def main():
70
- st.title("Research Paper Analysis Tool")
71
-
72
  # File uploader
73
- uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
74
-
75
- if uploaded_files:
76
- if st.button("Process Papers"):
77
  # Initialize progress bar
78
  progress_bar = st.progress(0)
79
  status_text = st.empty()
80
-
 
 
 
81
  # Initialize results dictionary
82
  results = []
83
-
84
- # Define categories
85
- categories = [
86
- "Summarized Abstract", "Results", "Summarized Introduction",
87
- "Methods Used", "Literature Survey", "Limitations",
88
- "Contributions", "Practical Implications", "Objectives",
89
- "Findings", "Future Research", "Dependent Variables",
90
- "Independent Variables", "Dataset", "Problem Statement",
91
- "Challenges", "Applications"
92
- ]
93
-
94
- # Process each file
95
- for i, file in enumerate(uploaded_files):
96
- status_text.text(f"Processing {file.name}...")
97
-
98
- # Extract text from PDF
99
- text = extract_text_from_pdf(file)
100
-
101
- # Initialize paper results
102
- paper_results = {"Filename": file.name}
103
-
104
- # Analyze each category
105
- for j, category in enumerate(categories):
106
- status_text.text(f"Processing {file.name} - {category}")
107
- paper_results[category] = analyze_paper(text, category)
108
-
109
- # Update progress
110
- progress = (i * len(categories) + j + 1) / (len(uploaded_files) * len(categories))
111
- progress_bar.progress(progress)
112
-
113
- # Add small delay to avoid API rate limits
114
- time.sleep(1)
115
-
116
- results.append(paper_results)
117
-
118
- # Create DataFrame
119
- df = pd.DataFrame(results)
120
-
121
  # Convert DataFrame to CSV
122
- csv = df.to_csv(index=False)
123
-
124
  # Create download button
125
  st.download_button(
126
  label="Download Results as CSV",
127
  data=csv,
128
- file_name="research_papers_analysis.csv",
129
- mime="text/csv"
130
  )
131
-
132
- # Display results in the app
133
  st.subheader("Analysis Results")
134
- st.dataframe(df)
135
-
 
 
 
 
 
 
 
 
 
 
136
  status_text.text("Processing complete!")
137
  progress_bar.progress(1.0)
138
 
 
33
  st.error(f"API Error: {str(e)}")
34
  return ""
35
 
36
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def main():
39
+ st.title("Research Corpus Synthesis Tool")
40
+
41
  # File uploader
42
+ uploaded_file = st.file_uploader("Upload CSV file", type="csv")
43
+
44
+ if uploaded_file:
45
+ if st.button("Process CSV"):
46
  # Initialize progress bar
47
  progress_bar = st.progress(0)
48
  status_text = st.empty()
49
+
50
+ # Read CSV file into DataFrame
51
+ df = pd.read_csv(uploaded_file)
52
+
53
  # Initialize results dictionary
54
  results = []
55
+
56
+ # Process each column
57
+ for i, column in enumerate(df.columns):
58
+ status_text.text(f"Processing column: {column}")
59
+
60
+ # Extract text from column
61
+ text = " ".join(df[column].astype(str).tolist())
62
+
63
+ # Generate prompt
64
+ prompt = f"You are a Professional Researcher and Analyser with 10 yrs of Experience.Find details and Elaborate on Top Trends,Theories,Methods,FrameWorks with this topic ({column}):\n\n{text[:5000]}" # Limit text to avoid token limits
65
+
66
+ # Call Perplexity API
67
+ result = call_perplexity_api(prompt)
68
+ results.append({"Column": column, "Result": result})
69
+
70
+ # Update progress
71
+ progress = (i + 1) / len(df.columns)
72
+ progress_bar.progress(progress)
73
+
74
+ # Create DataFrame from results
75
+ results_df = pd.DataFrame(results)
76
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  # Convert DataFrame to CSV
78
+ csv = results_df.to_csv(index=False)
79
+
80
  # Create download button
81
  st.download_button(
82
  label="Download Results as CSV",
83
  data=csv,
84
+ file_name="column_trends_analysis.csv",
85
+ mime="text/csv",
86
  )
87
+
 
88
  st.subheader("Analysis Results")
89
+ styled_df = results_df.style.set_properties(**{
90
+ 'background-color': '#f9f9f9',
91
+ 'color': '#333',
92
+ 'border-color': 'black',
93
+ 'border-width': '1px',
94
+ 'border-style': 'solid',
95
+ 'font-family': 'Arial, sans-serif',
96
+ 'font-size': '14px',
97
+ 'text-align': 'left',
98
+ 'padding': '10px'
99
+ })
100
+ st.dataframe(styled_df)
101
  status_text.text("Processing complete!")
102
  progress_bar.progress(1.0)
103
 
infranew.py CHANGED
@@ -1,3 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import networkx as nx
@@ -6,8 +237,12 @@ from bokeh.plotting import figure, from_networkx
6
  import requests
7
  import json
8
  import google.generativeai as genai
 
 
 
 
9
 
10
- PERPLEXITY_API_KEY = "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"
11
  PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
12
 
13
 
@@ -132,7 +367,7 @@ def search_papers(topic: str, num_papers: int) -> list:
132
  return []
133
 
134
 
135
- import os
136
 
137
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
138
  GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
@@ -160,7 +395,7 @@ def call_gemini_api(prompt: str) -> str:
160
 
161
 
162
  def generate_gaps_paragraph(gaps):
163
- prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps: {', '.join(gaps)}"
164
  return call_gemini_api(prompt)
165
 
166
 
@@ -193,7 +428,12 @@ def generate_insights(G, topic):
193
  if gaps_paragraph:
194
  st.write("### Gaps in Research")
195
  st.write(gaps_paragraph)
196
-
 
 
 
 
 
197
 
198
  def main():
199
  st.title("Advanced Interactive Knowledge Graph")
 
1
+ # import streamlit as st
2
+ # import pandas as pd
3
+ # import networkx as nx
4
+ # from bokeh.models import HoverTool
5
+ # from bokeh.plotting import figure, from_networkx
6
+ # import requests
7
+ # import json
8
+ # import google.generativeai as genai
9
+
10
+ # PERPLEXITY_API_KEY = "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"
11
+ # PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
12
+
13
+
14
+ # def extract_edges(keywords):
15
+ # keywords = [kw.strip() for kw in keywords.split(",")]
16
+ # edges = [
17
+ # (keywords[i], keywords[j])
18
+ # for i in range(len(keywords))
19
+ # for j in range(i + 1, len(keywords))
20
+ # ]
21
+ # return edges
22
+
23
+
24
+ # def create_knowledge_graph(data):
25
+ # G = nx.Graph()
26
+
27
+ # for _, row in data.iterrows():
28
+ # words = []
29
+ # for col in data.columns:
30
+ # if pd.notnull(row[col]):
31
+ # # Convert to string and handle numeric values
32
+ # cell_value = str(row[col]).strip()
33
+ # if cell_value:
34
+ # words.extend(cell_value.split())
35
+
36
+ # if words:
37
+ # edges = extract_edges(",".join(words))
38
+ # G.add_edges_from(edges)
39
+
40
+ # for word in words:
41
+ # word = word.strip()
42
+ # if word not in G:
43
+ # G.add_node(word, title=word, value=len(word))
44
+
45
+ # return G
46
+
47
+
48
+ # def render_graph_bokeh(G):
49
+ # plot = figure(
50
+ # title="Interactive Knowledge Graph",
51
+ # x_range=(-1.5, 1.5),
52
+ # y_range=(-1.5, 1.5),
53
+ # tools="pan,wheel_zoom,box_zoom,reset,tap",
54
+ # active_scroll="wheel_zoom",
55
+ # )
56
+ # plot.add_tools(HoverTool(tooltips="@index"))
57
+
58
+ # graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0, 0))
59
+
60
+ # graph_renderer.node_renderer.glyph.size = 10
61
+ # graph_renderer.node_renderer.glyph.fill_color = "blue"
62
+ # graph_renderer.node_renderer.glyph.line_color = "black"
63
+
64
+ # graph_renderer.edge_renderer.glyph.line_width = 1
65
+ # graph_renderer.edge_renderer.glyph.line_color = "gray"
66
+
67
+ # plot.renderers.append(graph_renderer)
68
+
69
+ # return plot
70
+
71
+
72
+ # import re
73
+
74
+
75
+ # def search_papers(topic: str, num_papers: int) -> list:
76
+ # headers = {
77
+ # "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
78
+ # "Content-Type": "application/json",
79
+ # }
80
+
81
+ # prompt = f"""Find {num_papers} recent research papers about {topic}.
82
+ # Return ONLY a valid JSON array with the following structure for each paper:
83
+ # [
84
+ # {{
85
+ # "Title": "paper title",
86
+ # "Abstract": "abstract text",
87
+ # "Keywords": "key terms"
88
+ # }}
89
+ # ]"""
90
+
91
+ # payload = {
92
+ # "model": "llama-3.1-sonar-small-128k-chat",
93
+ # "messages": [
94
+ # {
95
+ # "role": "system",
96
+ # "content": "You are a research paper analyzer that returns valid JSON arrays.",
97
+ # },
98
+ # {"role": "user", "content": prompt},
99
+ # ],
100
+ # "temperature": 0.1,
101
+ # }
102
+
103
+ # try:
104
+ # response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
105
+ # response.raise_for_status()
106
+ # content = response.json()["choices"][0]["message"]["content"]
107
+
108
+ # # Clean response to ensure valid JSON
109
+ # content = content.strip()
110
+ # if not content.startswith("["):
111
+ # content = content[content.find("[") :]
112
+ # if not content.endswith("]"):
113
+ # content = content[: content.rfind("]") + 1]
114
+
115
+ # # Remove any trailing commas before closing brackets
116
+ # content = re.sub(r",\s*]", "]", content)
117
+ # content = re.sub(r",\s*}", "}", content)
118
+
119
+ # papers = json.loads(content)
120
+ # if not isinstance(papers, list):
121
+ # raise ValueError("Response is not a JSON array")
122
+ # return papers
123
+ # except requests.exceptions.RequestException as e:
124
+ # st.error(f"API Request Error: {str(e)}")
125
+ # return []
126
+ # except json.JSONDecodeError as e:
127
+ # st.error(f"Invalid JSON response: {str(e)}")
128
+ # st.error(f"Response content: {response.text}")
129
+ # return []
130
+ # except ValueError as e:
131
+ # st.error(f"Error: {str(e)}")
132
+ # return []
133
+
134
+
135
+ # import os
136
+
137
+ # GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
138
+ # GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
139
+
140
+
141
+ # def call_gemini_api(prompt: str) -> str:
142
+ # headers = {
143
+ # "Authorization": f"Bearer {GEMINI_API_KEY}",
144
+ # "Content-Type": "application/json",
145
+ # }
146
+
147
+ # payload = {
148
+ # "prompt": prompt,
149
+ # "max_tokens": 150,
150
+ # "temperature": 0.7,
151
+ # }
152
+
153
+ # try:
154
+ # model = genai.GenerativeModel("gemini-pro")
155
+ # response = model.generate_content(prompt)
156
+ # return response.text
157
+ # except Exception as e:
158
+ # st.error(f"Gemini API Error: {str(e)}")
159
+ # return ""
160
+
161
+
162
+ # def generate_gaps_paragraph(gaps):
163
+ # prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps: {', '.join(gaps)}"
164
+ # return call_gemini_api(prompt)
165
+
166
+
167
+ # def generate_insights(G, topic):
168
+ # papers = search_papers(topic, 5)
169
+ # if papers:
170
+ # st.write("### Research Insights from Perplexity API")
171
+ # for paper in papers:
172
+ # st.write(f"**Title:** {paper['Title']}")
173
+ # st.write(f"**Abstract:** {paper['Abstract']}")
174
+ # st.write(f"**Keywords:** {paper['Keywords']}")
175
+ # st.write("---")
176
+
177
+ # nodes = list(G.nodes(data=True))
178
+ # insights = {}
179
+ # insights["Strong Points"] = [
180
+ # n for n, d in nodes if G.degree(n) > len(G.nodes) * 0.1
181
+ # ]
182
+ # insights["Weak Points"] = [n for n, d in nodes if G.degree(n) < len(G.nodes) * 0.05]
183
+ # insights["Gaps"] = [n for n, d in nodes if len(list(nx.neighbors(G, n))) == 0]
184
+
185
+ # st.write("### Graph-Based Insights")
186
+ # st.write("**Strong Points:**", insights["Strong Points"])
187
+ # st.write("**Weak Points:**", insights["Weak Points"])
188
+ # st.write("**Gaps:**", insights["Gaps"])
189
+
190
+ # if insights["Gaps"]:
191
+ # with st.spinner("Generating insights about gaps..."):
192
+ # gaps_paragraph = generate_gaps_paragraph(insights["Gaps"])
193
+ # if gaps_paragraph:
194
+ # st.write("### Gaps in Research")
195
+ # st.write(gaps_paragraph)
196
+
197
+
198
+ # def main():
199
+ # st.title("Advanced Interactive Knowledge Graph")
200
+ # st.write(
201
+ # "Upload a CSV file to generate a fully interactive and insightful knowledge graph."
202
+ # )
203
+
204
+ # uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
205
+
206
+ # if uploaded_file is not None:
207
+ # try:
208
+ # data = pd.read_csv(uploaded_file)
209
+ # st.write("Preview of the uploaded data:")
210
+ # st.dataframe(data.head())
211
+
212
+ # G = create_knowledge_graph(data)
213
+
214
+ # st.write("Generated Knowledge Graph:")
215
+ # plot = render_graph_bokeh(G)
216
+ # st.bokeh_chart(plot, use_container_width=True)
217
+
218
+ # topic = st.text_input(
219
+ # "Enter a topic for additional insights:", "knowledge graphs"
220
+ # )
221
+ # if topic:
222
+ # generate_insights(G, topic)
223
+
224
+ # except Exception as e:
225
+ # st.error(f"An error occurred while processing the file: {e}")
226
+ # else:
227
+ # st.info("Please upload a CSV file to get started.")
228
+
229
+
230
+ # if __name__ == "__main__":
231
+ # main()
232
  import streamlit as st
233
  import pandas as pd
234
  import networkx as nx
 
237
  import requests
238
  import json
239
  import google.generativeai as genai
240
+ from dotenv import load_dotenv
241
+ import os
242
+
243
+ load_dotenv()
244
 
245
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
246
  PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
247
 
248
 
 
367
  return []
368
 
369
 
370
+
371
 
372
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
373
  GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
 
395
 
396
 
397
  def generate_gaps_paragraph(gaps):
398
+ prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps provide evidence-based(how did you deduce this) recommendations for new research paper ideas based on these gaps, Justify your recommendations with evidence from the data it analyzes. Give atleast 10 new research paper ideas based on the gaps and 500 words gap analysis,Give a different table for new research ideas with evidence that why did you reccommend it, In the new Table Containing Research Paper Ideas there should be 4 columns - New Research Paper Idea/Title,Evidence , Methodology(How will we approach the ideo new research paper), Evidence Of How Methodology was obtained: {', '.join(gaps)}"
399
  return call_gemini_api(prompt)
400
 
401
 
 
428
  if gaps_paragraph:
429
  st.write("### Gaps in Research")
430
  st.write(gaps_paragraph)
431
+ st.download_button(
432
+ label="Download Gaps Analysis as Text",
433
+ data=gaps_paragraph,
434
+ file_name="gaps_analysis.txt",
435
+ mime="text/plain",
436
+ )
437
 
438
  def main():
439
  st.title("Advanced Interactive Knowledge Graph")
new_research_paper.py CHANGED
@@ -1,3 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import requests
@@ -33,7 +136,75 @@ def call_perplexity_api(prompt: str) -> str:
33
  return ""
34
 
35
 
36
- def generate_research_paper(df: pd.DataFrame) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  """
38
  For each column in the DataFrame, generate a research paper section (200-500 words)
39
  that addresses the data in that column. Return a dict mapping column -> text.
@@ -43,9 +214,14 @@ def generate_research_paper(df: pd.DataFrame) -> dict:
43
  # Convert all non-null rows in the column to strings and join them for context
44
  col_values = df[col].dropna().astype(str).tolist()
45
  # We'll truncate if this is huge
 
46
  sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
47
  prompt = f"""
48
- Topic: {col}
 
 
 
 
49
  Data Sample: {sample_text}
50
 
51
  Generate a professional research paper section for the above column.
@@ -58,20 +234,31 @@ def generate_research_paper(df: pd.DataFrame) -> dict:
58
  return paper_sections
59
 
60
 
61
- def format_paper(paper_dict: dict) -> str:
62
  """
63
  Format the generated paper into a Markdown string.
64
- Each column name is used as a heading, and the text is placed under it.
 
65
  """
66
- md_text = "# Generated Research Paper\n\n"
 
 
67
  for col, content in paper_dict.items():
68
- md_text += f"## {col}\n{content}\n\n"
69
  return md_text
70
 
71
 
72
- def main():
73
  st.title("Corpus-based Research Paper Generator")
74
 
 
 
 
 
 
 
 
 
75
  uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
76
  if uploaded_file:
77
  df = pd.read_csv(uploaded_file)
@@ -81,9 +268,9 @@ def main():
81
  if st.button("Generate Research Paper"):
82
  st.info("Generating paper based on the columns of your corpus...")
83
  with st.spinner("Calling Perplexity AI..."):
84
- paper = generate_research_paper(df)
85
  if paper:
86
- formatted_paper = format_paper(paper)
87
  st.success("Research Paper Generated Successfully!")
88
  st.write(formatted_paper)
89
 
@@ -98,6 +285,78 @@ def main():
98
  "Paper generation failed. Please check Perplexity API key."
99
  )
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  if __name__ == "__main__":
103
- main()
 
1
+ # import streamlit as st
2
+ # import pandas as pd
3
+ # import requests
4
+ # import json
5
+ # import os
6
+ # from dotenv import load_dotenv
7
+
8
+ # # Load environment variables
9
+ # load_dotenv()
10
+ # PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
11
+ # PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
12
+
13
+
14
+ # def call_perplexity_api(prompt: str) -> str:
15
+ # """Call Perplexity AI with a prompt, return the text response if successful."""
16
+ # headers = {
17
+ # "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
18
+ # "Content-Type": "application/json",
19
+ # }
20
+
21
+ # payload = {
22
+ # "model": "llama-3.1-sonar-small-128k-chat",
23
+ # "messages": [{"role": "user", "content": prompt}],
24
+ # "temperature": 0.3,
25
+ # }
26
+
27
+ # try:
28
+ # response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
29
+ # response.raise_for_status()
30
+ # return response.json()["choices"][0]["message"]["content"]
31
+ # except Exception as e:
32
+ # st.error(f"API Error: {str(e)}")
33
+ # return ""
34
+
35
+
36
+ # def generate_research_paper(df: pd.DataFrame) -> dict:
37
+ # """
38
+ # For each column in the DataFrame, generate a research paper section (200-500 words)
39
+ # that addresses the data in that column. Return a dict mapping column -> text.
40
+ # """
41
+ # paper_sections = {}
42
+ # for col in df.columns:
43
+ # # Convert all non-null rows in the column to strings and join them for context
44
+ # col_values = df[col].dropna().astype(str).tolist()
45
+ # # We'll truncate if this is huge
46
+ # sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
47
+ # prompt = f"""
48
+ # Topic: {col}
49
+ # Data Sample: {sample_text}
50
+
51
+ # Generate a professional research paper section for the above column.
52
+ # The section should be at least 100 words and at most 150 words,
53
+ # focusing on key insights, challenges, and potential research angles.
54
+ # Integrate the data samples as context for the content.
55
+ # """
56
+ # section_text = call_perplexity_api(prompt)
57
+ # paper_sections[col] = section_text.strip() if section_text else ""
58
+ # return paper_sections
59
+
60
+
61
+ # def format_paper(paper_dict: dict) -> str:
62
+ # """
63
+ # Format the generated paper into a Markdown string.
64
+ # Each column name is used as a heading, and the text is placed under it.
65
+ # """
66
+ # md_text = "# Generated Research Paper\n\n"
67
+ # for col, content in paper_dict.items():
68
+ # md_text += f"## {col}\n{content}\n\n"
69
+ # return md_text
70
+
71
+
72
+ # def main():
73
+ # st.title("Corpus-based Research Paper Generator")
74
+
75
+ # uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
76
+ # if uploaded_file:
77
+ # df = pd.read_csv(uploaded_file)
78
+ # st.write("### Preview of Uploaded Data")
79
+ # st.dataframe(df.head())
80
+
81
+ # if st.button("Generate Research Paper"):
82
+ # st.info("Generating paper based on the columns of your corpus...")
83
+ # with st.spinner("Calling Perplexity AI..."):
84
+ # paper = generate_research_paper(df)
85
+ # if paper:
86
+ # formatted_paper = format_paper(paper)
87
+ # st.success("Research Paper Generated Successfully!")
88
+ # st.write(formatted_paper)
89
+
90
+ # st.download_button(
91
+ # label="Download Paper as Markdown",
92
+ # data=formatted_paper,
93
+ # file_name="research_paper.md",
94
+ # mime="text/markdown",
95
+ # )
96
+ # else:
97
+ # st.error(
98
+ # "Paper generation failed. Please check Perplexity API key."
99
+ # )
100
+
101
+
102
+ # if __name__ == "__main__":
103
+ # main()
104
  import streamlit as st
105
  import pandas as pd
106
  import requests
 
136
  return ""
137
 
138
 
139
+ # def generate_research_paper(df: pd.DataFrame) -> dict:
140
+ # """
141
+ # For each column in the DataFrame, generate a research paper section (200-500 words)
142
+ # that addresses the data in that column. Return a dict mapping column -> text.
143
+ # """
144
+ # paper_sections = {}
145
+ # for col in df.columns:
146
+ # # Convert all non-null rows in the column to strings and join them for context
147
+ # col_values = df[col].dropna().astype(str).tolist()
148
+ # # We'll truncate if this is huge
149
+ # sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
150
+ # prompt = f"""
151
+ # Topic: {col}
152
+ # Data Sample: {sample_text}
153
+
154
+ # Generate a professional research paper section for the above column.
155
+ # The section should be at least 100 words and at most 150 words,
156
+ # focusing on key insights, challenges, and potential research angles.
157
+ # Integrate the data samples as context for the content.
158
+ # """
159
+ # section_text = call_perplexity_api(prompt)
160
+ # paper_sections[col] = section_text.strip() if section_text else ""
161
+ # return paper_sections
162
+
163
+
164
+ # def format_paper(paper_dict: dict) -> str:
165
+ # """
166
+ # Format the generated paper into a Markdown string.
167
+ # Each column name is used as a heading, and the text is placed under it.
168
+ # """
169
+ # md_text = "# Generated Research Paper\n\n"
170
+ # for col, content in paper_dict.items():
171
+ # md_text += f"## {col}\n{content}\n\n"
172
+ # return md_text
173
+
174
+
175
+ # def main():
176
+ # st.title("Corpus-based Research Paper Generator")
177
+
178
+ # uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
179
+ # if uploaded_file:
180
+ # df = pd.read_csv(uploaded_file)
181
+ # st.write("### Preview of Uploaded Data")
182
+ # st.dataframe(df.head())
183
+
184
+ # if st.button("Generate Research Paper"):
185
+ # st.info("Generating paper based on the columns of your corpus...")
186
+ # with st.spinner("Calling Perplexity AI..."):
187
+ # paper = generate_research_paper(df)
188
+ # if paper:
189
+ # formatted_paper = format_paper(paper)
190
+ # st.success("Research Paper Generated Successfully!")
191
+ # st.write(formatted_paper)
192
+
193
+ # st.download_button(
194
+ # label="Download Paper as Markdown",
195
+ # data=formatted_paper,
196
+ # file_name="research_paper.md",
197
+ # mime="text/markdown",
198
+ # )
199
+ # else:
200
+ # st.error(
201
+ # "Paper generation failed. Please check Perplexity API key."
202
+ # )
203
+
204
+
205
+ # if __name__ == "__main__":
206
+ # main()
207
+ #def generate_research_paper(df: pd.DataFrame, gaps_analysis: str, topic: str, journal: str, format: str) -> dict:
208
  """
209
  For each column in the DataFrame, generate a research paper section (200-500 words)
210
  that addresses the data in that column. Return a dict mapping column -> text.
 
214
  # Convert all non-null rows in the column to strings and join them for context
215
  col_values = df[col].dropna().astype(str).tolist()
216
  # We'll truncate if this is huge
217
+ print(col)
218
  sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
219
  prompt = f"""
220
+ Topic: {topic}
221
+ Journal/Conference: {journal}
222
+ Format: {format}
223
+ Gaps Analysis: {gaps_analysis}
224
+ Column: {col}
225
  Data Sample: {sample_text}
226
 
227
  Generate a professional research paper section for the above column.
 
234
  return paper_sections
235
 
236
 
237
+ #def format_paper(paper_dict: dict, topic: str, journal: str, format: str) -> str:
238
  """
239
  Format the generated paper into a Markdown string.
240
+ Add the topic, journal, and format as the main title, each column name as a heading,
241
+ and the corresponding text as paragraph content.
242
  """
243
+ md_text = f"# Research Paper on: {topic}\n\n"
244
+ md_text += f"## Journal/Conference: {journal}\n\n"
245
+ md_text += f"## Format: {format}\n\n"
246
  for col, content in paper_dict.items():
247
+ md_text += f"### {col}\n{content}\n\n"
248
  return md_text
249
 
250
 
251
+ #def main():
252
  st.title("Corpus-based Research Paper Generator")
253
 
254
+ topic_input = st.text_input("Enter the topic for the research paper:")
255
+ journal_input = st.text_input("Enter the Journal/Conference aimed to publish:")
256
+ format_input = st.text_input("Enter the format of the research paper:")
257
+ gaps_analysis_file = st.file_uploader("Upload Gaps Analysis (.txt file)", type="txt")
258
+ gaps_analysis = ""
259
+ if gaps_analysis_file:
260
+ gaps_analysis = gaps_analysis_file.getvalue().decode("utf-8")
261
+
262
  uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
263
  if uploaded_file:
264
  df = pd.read_csv(uploaded_file)
 
268
  if st.button("Generate Research Paper"):
269
  st.info("Generating paper based on the columns of your corpus...")
270
  with st.spinner("Calling Perplexity AI..."):
271
+ paper = generate_research_paper(df, gaps_analysis, topic_input, journal_input, format_input)
272
  if paper:
273
+ formatted_paper = format_paper(paper, topic_input, journal_input, format_input)
274
  st.success("Research Paper Generated Successfully!")
275
  st.write(formatted_paper)
276
 
 
285
  "Paper generation failed. Please check Perplexity API key."
286
  )
287
 
288
+ def generate_research_paper(df: pd.DataFrame, gaps_analysis: str, topic: str, journal: str, format: str) -> str:
289
+ """
290
+ Generate a research paper based on the entire DataFrame, the topic, journal, and format.
291
+ """
292
+ # Convert the entire DataFrame to a string
293
+ df_string = df.to_string(index=False)
294
+
295
+ # Create the prompt
296
+ prompt = f"""
297
+ Topic: {topic}
298
+ Journal/Conference: {journal}
299
+ Format: {format}
300
+ Gaps Analysis: {gaps_analysis}
301
+ Data:
302
+ {df_string}
303
+
304
+ Generate a professional research paper based on the above data.
305
+ The paper should be well-structured, focusing on key insights, challenges, and potential research angles.
306
+ Use the Gaps Analysis to identify areas for improvement and future work and fill the gaps in the new paper.
307
+ Use the data as a reference to support your arguments, dont directly copy the data.
308
+ Ensure the paper is formatted according to the specified journal/conference format.
309
+ """
310
+
311
+ # Call the Perplexity API
312
+ paper_text = call_perplexity_api(prompt)
313
+ return paper_text.strip() if paper_text else ""
314
+
315
+ def format_paper(paper_text: str, topic: str, journal: str, format: str) -> str:
316
+ """
317
+ Format the generated paper into a Markdown string.
318
+ Add the topic, journal, and format as the main title, and the paper text as content.
319
+ """
320
+ md_text = f"# Research Paper on: {topic}\n\n"
321
+ md_text += paper_text
322
+ return md_text
323
+
324
+ def main():
325
+ st.title("Corpus-based Research Paper Generator")
326
+
327
+ topic_input = st.text_input("Enter the topic for the research paper:")
328
+ journal_input = st.text_input("Enter the Journal/Conference aimed to publish:")
329
+ format_input = st.text_input("Enter the format of the research paper:")
330
+ gaps_analysis_file = st.file_uploader("Upload Gaps Analysis (.txt file)", type="txt")
331
+ gaps_analysis = ""
332
+ if gaps_analysis_file:
333
+ gaps_analysis = gaps_analysis_file.getvalue().decode("utf-8")
334
+
335
+ uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
336
+ if uploaded_file:
337
+ df = pd.read_csv(uploaded_file)
338
+ st.write("### Preview of Uploaded Data")
339
+ st.dataframe(df.head())
340
+
341
+ if st.button("Generate Research Paper"):
342
+ st.info("Generating paper based on the columns of your corpus...")
343
+ with st.spinner("Calling Perplexity AI..."):
344
+ paper_text = generate_research_paper(df, gaps_analysis, topic_input, journal_input, format_input)
345
+ if paper_text:
346
+ formatted_paper = format_paper(paper_text, topic_input, journal_input, format_input)
347
+ st.success("Research Paper Generated Successfully!")
348
+ st.write(formatted_paper)
349
+
350
+ st.download_button(
351
+ label="Download Paper as Markdown",
352
+ data=formatted_paper,
353
+ file_name="research_paper.md",
354
+ mime="text/markdown",
355
+ )
356
+ else:
357
+ st.error(
358
+ "Paper generation failed. Please check Perplexity API key."
359
+ )
360
 
361
  if __name__ == "__main__":
362
+ main()
papers_filtered_export.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ _id,Title,Publication,Journal_Conference,Abstract,Keywords,Author,Date_of_Publication,Intro,Literature_Review,Introduction,Body,Methodology,Participants,Survey Instrument,Data Collection,Data Analysis,Results and Discussion,Conclusion,References
2
+ 6783960b5c01fc5ec3a02bcc,Enhancing Student Engagement in GenAI Education: A FlipClassroom Approach,Journal of Educational Technology,Journal of Educational Technology,"This study explores the integration of Generative AI (GenAI) into a flipped classroom setting to enhance student engagement in educational technology courses. The authors designed an experiment where students used GenAI tools to create interactive learning materials before class, which were then discussed in class. The results showed significant improvements in student participation and understanding.","GenAI, flipped classroom, educational technology, student engagement","John Doe, Jane Smith",2023-02-15,The integration of GenAI into educational settings has been gaining attention due to its potential to enhance learning experiences.,"Previous studies have shown that flipped classrooms can improve student engagement, but the addition of GenAI tools has not been extensively explored.",This study aims to investigate whether incorporating GenAI into a flipped classroom setting can further enhance student engagement.,The study involved 100 students divided into two groups: one using traditional methods and the other using GenAI tools.,"Students in the GenAI group created interactive materials using AI tools before class, while those in the traditional group used standard textbooks.",100 students from a university in the United States,A survey was administered at the end of the semester to assess student engagement and understanding.,Data was collected through surveys and class participation observations.,Descriptive statistics were used to analyze survey responses and class participation data.,The results showed that students in the GenAI group had higher participation rates and better understanding of the material compared to those in the traditional group.,The integration of GenAI into a flipped classroom setting can significantly enhance student engagement and understanding in educational technology courses.,"{'Author': 'Doe, J.', 'Year': '2023', 'Title': 'Enhancing Student Engagement in GenAI Education'}, {'Author': 'Smith, J.', 'Year': '2022', 'Title': 'Flipped Classrooms and Educational Technology'}"
3
+ 6783960b5c01fc5ec3a02bcd,Assessing the Impact of GenAI on Student Outcomes in a Flipped Classroom Environment,Journal of Educational Innovation,Journal of Educational Innovation,"This study evaluates the impact of integrating Generative AI (GenAI) into a flipped classroom environment on student outcomes. The authors conducted an experiment where students used GenAI tools to create personalized learning materials, which were then discussed in class. The results indicate that GenAI integration led to improved student performance and increased student satisfaction.","GenAI, flipped classroom, student outcomes, educational innovation","Jane Doe, John Smith",2023-05-01,The integration of GenAI into educational settings has been gaining attention due to its potential to personalize learning experiences.,"Previous studies have shown that flipped classrooms can improve student outcomes, but the addition of GenAI tools has not been extensively explored.",This study aims to investigate whether incorporating GenAI into a flipped classroom setting can improve student performance and satisfaction.,The study involved 150 students divided into two groups: one using traditional methods and the other using GenAI tools.,"Students in the GenAI group created personalized materials using AI tools before class, while those in the traditional group used standard textbooks.",150 students from a university in the United States,A survey was administered at the end of the semester to assess student satisfaction and performance.,Data was collected through surveys and class participation observations.,Descriptive statistics were used to analyze survey responses and class participation data.,The results showed that students in the GenAI group had higher performance rates and better satisfaction compared to those in the traditional group.,The integration of GenAI into a flipped classroom setting can significantly improve student performance and satisfaction in educational technology courses.,"{'Author': 'Doe, J.', 'Year': '2023', 'Title': 'Assessing the Impact of GenAI on Student Outcomes'}, {'Author': 'Smith, J.', 'Year': '2022', 'Title': 'Flipped Classrooms and Educational Innovation'}"
4
+ 678396345c01fc5ec3a02bd0,Exploring the Impact of GenAI on the FlipClassroom Model: A Case Study,Journal of Educational Technology,Journal of Educational Technology,"This study investigates the integration of Generative AI (GenAI) into the flipped classroom model. We examine how GenAI can enhance student engagement, improve learning outcomes, and reduce teacher workload.","GenAI, flipped classroom, educational technology, student engagement, learning outcomes","John Doe, Jane Smith",2023-02-15,"The flipped classroom model has gained popularity in recent years due to its potential to improve student learning outcomes. However, it also presents challenges such as increased teacher workload and limited student engagement.",A comprehensive review of existing literature on the flipped classroom model and its integration with GenAI is provided.,This study aims to explore the impact of GenAI on the flipped classroom model by examining its effects on student engagement and learning outcomes.,The study employed a mixed-methods approach combining both qualitative and quantitative data collection methods.,A total of 100 students participated in this study. They were divided into two groups: one using traditional teaching methods and the other using GenAI-enhanced materials.,100 students from a local university,A survey was administered to gather data on student perceptions and experiences.,"Data was collected through surveys, interviews, and observation.",Qualitative data was analyzed using thematic analysis while quantitative data was analyzed using statistical methods.,The results show significant improvements in student engagement and learning outcomes when GenAI is integrated into the flipped classroom model.,The integration of GenAI into the flipped classroom model can significantly enhance student engagement and improve learning outcomes.,"{'Author': 'Doe, J.', 'Year': '2023', 'Title': 'GenAI in Education'}, {'Author': 'Smith, J.', 'Year': '2022', 'Title': 'Flipped Classroom Models'}"
5
+ 678396345c01fc5ec3a02bd1,Enhancing the Flipped Classroom with Generative AI: A Pilot Study,International Journal of Educational Research,International Journal of Educational Research,This pilot study explores the potential of Generative AI (GenAI) to enhance the flipped classroom model by automating content creation and providing personalized learning experiences.,"GenAI, flipped classroom, educational technology, personalized learning","Jane Doe, John Smith",2023-05-01,The flipped classroom model has shown promise in improving student learning outcomes but faces challenges related to content creation and personalization.,A review of existing literature on GenAI applications in education is provided.,This study aims to pilot-test the integration of GenAI into the flipped classroom model focusing on its potential for automating content creation and providing personalized learning experiences.,The study employed an experimental design where one group received GenAI-generated materials while another group received traditional materials.,A total of 50 students participated in this pilot study. They were divided into two groups based on their exposure to GenAI-generated materials.,50 students from a local high school,A survey was administered post-intervention to gather feedback from participants.,Data was collected through surveys and observation.,Qualitative data was analyzed using thematic analysis while quantitative data was analyzed using statistical methods.,The results indicate that GenAI can effectively automate content creation and provide personalized learning experiences within the flipped classroom model.,The integration of GenAI into the flipped classroom model shows promise for enhancing student engagement and improving learning outcomes through automation and personalization.,"{'Author': 'Doe, J.', 'Year': '2023', 'Title': 'GenAI in Education'}, {'Author': 'Smith, J.', 'Year': '2022', 'Title': 'Flipped Classroom Models'}"
research22.py CHANGED
@@ -10,6 +10,7 @@ from dotenv import load_dotenv
10
  import os
11
  import json
12
  import re
 
13
 
14
  # --------------------------------------------------------------------------------
15
  # 1. Environment Setup
@@ -20,6 +21,8 @@ MONGODB_URI = os.getenv(
20
  "MONGODB_UR",
21
  "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
22
  )
 
 
23
  # Gemini
24
  GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
25
 
@@ -27,6 +30,36 @@ GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
27
  genai.configure(api_key=GEMINI_KEY)
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # --------------------------------------------------------------------------------
31
  # 2. Database Connection
32
  # --------------------------------------------------------------------------------
@@ -67,13 +100,13 @@ def extract_text_from_pdf(pdf_file) -> str:
67
  # --------------------------------------------------------------------------------
68
  # 4. Gemini Response Helper
69
  # --------------------------------------------------------------------------------
70
- def get_gemini_response(prompt: str) -> str:
71
  """
72
  Sends a prompt to Google's Gemini model and returns the response text.
73
  Adjust this function as needed for your generative AI usage.
74
  """
75
  try:
76
- model = genai.GenerativeModel("gemini-pro")
77
  response = model.generate_content(prompt)
78
  return response.text
79
  except Exception as e:
@@ -112,7 +145,7 @@ def extract_basic_info(text: str) -> Dict[str, str]:
112
  Author: ...
113
  Date_of_Publication: ...
114
  """
115
- response = get_gemini_response(prompt)
116
  if not response:
117
  return {}
118
  info = {}
@@ -153,7 +186,7 @@ def extract_content_sections(text: str) -> Dict[str, str]:
153
  Future_Scope: <text>
154
  Theory: <text>
155
  """
156
- response = get_gemini_response(prompt)
157
  if not response:
158
  return {}
159
  sections = {}
@@ -196,7 +229,7 @@ def extract_variables(text: str) -> Dict[str, Any]:
196
 
197
  Paper text: {text}
198
  """
199
- response = get_gemini_response(prompt)
200
  if not response:
201
  return {}
202
  variables = {}
@@ -394,7 +427,7 @@ def extract_paper_fields(text: str, paper_type: str) -> Dict[str, Any]:
394
  Paper text:
395
  {text}
396
 
397
- Return them in this JSON format strictly, with no extra text:
398
  [
399
  {{
400
  {", ".join([f'"{attr}": "value"' for attr in selected_attrs])}
@@ -403,7 +436,7 @@ def extract_paper_fields(text: str, paper_type: str) -> Dict[str, Any]:
403
  """
404
 
405
  try:
406
- response = get_gemini_response(prompt)
407
  if not response:
408
  st.error("No response from Gemini.")
409
  return {}
@@ -451,7 +484,7 @@ def process_paper(text: str, paper_type: str):
451
  the corresponding MongoDB collection.
452
  """
453
  db = create_db_connection()
454
- if not db:
455
  return
456
 
457
  # Determine collection name
 
10
  import os
11
  import json
12
  import re
13
+ import requests
14
 
15
  # --------------------------------------------------------------------------------
16
  # 1. Environment Setup
 
21
  "MONGODB_UR",
22
  "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
23
  )
24
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
25
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
26
  # Gemini
27
  GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
28
 
 
30
  genai.configure(api_key=GEMINI_KEY)
31
 
32
 
33
+ def call_perplexity_api(prompt: str) -> str:
34
+ """
35
+ Call Perplexity AI with a prompt, returning the text response if successful.
36
+ """
37
+ headers = {
38
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
39
+ "Content-Type": "application/json",
40
+ }
41
+ payload = {
42
+ "model": "llama-3.1-sonar-small-128k-chat",
43
+ "messages": [{"role": "user", "content": prompt}],
44
+ "temperature": 0.3,
45
+ }
46
+
47
+ try:
48
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
49
+ response.raise_for_status()
50
+ return response.json()["choices"][0]["message"]["content"]
51
+ except Exception as e:
52
+ st.error(f"Perplexity API Error: {str(e)}")
53
+ return ""
54
+
55
+
56
+ def get_perplexity_response(prompt: str) -> str:
57
+ """
58
+ Wrapper that calls call_perplexity_api, mimicking the old gemini function name signature.
59
+ """
60
+ return call_perplexity_api(prompt)
61
+
62
+
63
  # --------------------------------------------------------------------------------
64
  # 2. Database Connection
65
  # --------------------------------------------------------------------------------
 
100
  # --------------------------------------------------------------------------------
101
  # 4. Gemini Response Helper
102
  # --------------------------------------------------------------------------------
103
+ def get_perplexity_response(prompt: str) -> str:
104
  """
105
  Sends a prompt to Google's Gemini model and returns the response text.
106
  Adjust this function as needed for your generative AI usage.
107
  """
108
  try:
109
+ model = genai.GenerativeModel("gemini-1.5-pro")
110
  response = model.generate_content(prompt)
111
  return response.text
112
  except Exception as e:
 
145
  Author: ...
146
  Date_of_Publication: ...
147
  """
148
+ response = get_perplexity_response(prompt)
149
  if not response:
150
  return {}
151
  info = {}
 
186
  Future_Scope: <text>
187
  Theory: <text>
188
  """
189
+ response = get_perplexity_response(prompt)
190
  if not response:
191
  return {}
192
  sections = {}
 
229
 
230
  Paper text: {text}
231
  """
232
+ response = get_perplexity_response(prompt)
233
  if not response:
234
  return {}
235
  variables = {}
 
427
  Paper text:
428
  {text}
429
 
430
+ Return them in this JSON format strictly, with no extra text, and strictly don't start the JSON with a newline or markdown and don't have Unterminated string:
431
  [
432
  {{
433
  {", ".join([f'"{attr}": "value"' for attr in selected_attrs])}
 
436
  """
437
 
438
  try:
439
+ response = get_perplexity_response(prompt)
440
  if not response:
441
  st.error("No response from Gemini.")
442
  return {}
 
484
  the corresponding MongoDB collection.
485
  """
486
  db = create_db_connection()
487
+ if db is None:
488
  return
489
 
490
  # Determine collection name
research_combine2.py CHANGED
@@ -193,12 +193,16 @@ def display_research_assistant_dashboard():
193
  "Knowledge Graph",
194
  "Cosine Similarity",
195
  "Paper Generator",
196
- "Paper from Topic",
197
  "Download Entire Corpus",
198
  "Research Copilot",
199
- "Research Paper Analysis Tool",
200
  ],
201
  )
 
 
 
 
202
 
203
  if option == "Search Papers":
204
  st.subheader("Search and Store Papers")
@@ -236,13 +240,14 @@ def display_research_assistant_dashboard():
236
  st.warning("Please enter a research topic")
237
 
238
  # Add MongoDB connection status
239
- if st.sidebar.button("Check Database Connection"):
240
- try:
241
- client.admin.command("ping")
242
- print(MONGODB_URI)
243
- st.sidebar.success("Connected to MongoDB")
244
- except Exception as e:
245
- st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
 
246
  elif option == "Single Keyword Search":
247
  keywords_database_download.main()
248
  elif option == "Multiple Keywords Search":
@@ -253,13 +258,12 @@ def display_research_assistant_dashboard():
253
  loldude.main()
254
  elif option == "Paper Generator":
255
  new_research_paper.main()
256
- elif option == "Paper from Topic":
257
- research3.main()
258
  elif option == "Download Entire Corpus":
259
  entire_download.main()
260
  elif option == "Research Copilot":
261
  sciclone.main()
262
- elif option == "Research Paper Analysis Tool":
263
  extract.main()
264
  else:
265
  research22.main()
 
193
  "Knowledge Graph",
194
  "Cosine Similarity",
195
  "Paper Generator",
196
+
197
  "Download Entire Corpus",
198
  "Research Copilot",
199
+ "Research Corpus Synthesis Tool",
200
  ],
201
  )
202
+ if st.sidebar.button("Logout", use_container_width=True):
203
+ for key in st.session_state.keys():
204
+ del st.session_state[key]
205
+ st.rerun()
206
 
207
  if option == "Search Papers":
208
  st.subheader("Search and Store Papers")
 
240
  st.warning("Please enter a research topic")
241
 
242
  # Add MongoDB connection status
243
+ # if st.sidebar.button("Check Database Connection"):
244
+ # try:
245
+ # client.admin.command("ping")
246
+ # print(MONGODB_URI)
247
+ # st.sidebar.success("Connected to MongoDB")
248
+ # except Exception as e:
249
+ # st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
250
+
251
  elif option == "Single Keyword Search":
252
  keywords_database_download.main()
253
  elif option == "Multiple Keywords Search":
 
258
  loldude.main()
259
  elif option == "Paper Generator":
260
  new_research_paper.main()
261
+
 
262
  elif option == "Download Entire Corpus":
263
  entire_download.main()
264
  elif option == "Research Copilot":
265
  sciclone.main()
266
+ elif option == "Research Corpus Synthesis Tool":
267
  extract.main()
268
  else:
269
  research22.main()
sciclone.py CHANGED
@@ -54,26 +54,28 @@ def extract_text_from_pdf(pdf_file):
54
  def analyze_paper(text: str, category: str) -> str:
55
  """Generate a prompt and get analysis for a specific category."""
56
  prompts = {
57
- "Summarized Abstract": "Extract and summarize the abstract from this research paper:",
58
- "Results": "What are the main results and findings from this research paper:",
59
- "Summarized Introduction": "Summarize the introduction section of this research paper:",
60
- "Methods Used": "What are the main methods and methodologies used in this research:",
61
- "Literature Survey": "Summarize the literature review or related work from this paper:",
62
- "Limitations": "What are the limitations mentioned in this research:",
63
- "Contributions": "What are the main contributions of this research:",
64
- "Practical Implications": "What are the practical implications of this research:",
65
- "Objectives": "What are the main objectives of this research:",
66
- "Findings": "What are the key findings from this research:",
67
- "Future Research": "What future research directions are suggested in this paper:",
68
- "Dependent Variables": "What are the dependent variables studied in this research:",
69
- "Independent Variables": "What are the independent variables studied in this research:",
70
- "Dataset": "What dataset(s) were used in this research:",
71
- "Problem Statement": "What is the main problem statement or research question:",
72
- "Challenges": "What challenges were faced or addressed in this research:",
73
- "Applications": "What are the potential applications of this research:",
74
  }
75
 
76
- prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
 
 
 
77
  return call_perplexity_api(prompt)
78
 
79
 
@@ -336,84 +338,136 @@ def main():
336
  uploaded_files = st.file_uploader(
337
  "Upload multiple PDF files", type="pdf", accept_multiple_files=True
338
  )
339
-
340
- if uploaded_files:
341
- if st.button("Process Papers"):
342
- # Initialize progress bar
343
- progress_bar = st.progress(0)
344
- status_text = st.empty()
345
-
346
- # Initialize results dictionary
347
- results = []
348
-
349
- # Define categories
350
- categories = [
351
- "Summarized Abstract",
352
- "Results",
353
- "Summarized Introduction",
354
- "Methods Used",
355
- "Literature Survey",
356
- "Limitations",
357
- "Contributions",
358
- "Practical Implications",
359
- "Objectives",
360
- "Findings",
361
- "Future Research",
362
- "Dependent Variables",
363
- "Independent Variables",
364
- "Dataset",
365
- "Problem Statement",
366
- "Challenges",
367
- "Applications",
368
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
- # Process each file
371
- for i, file in enumerate(uploaded_files):
372
- status_text.text(f"Processing {file.name}...")
373
-
374
- # Extract text from PDF
375
- text = extract_text_from_pdf(file)
376
-
377
- # Initialize paper results
378
- paper_results = {"Filename": file.name}
379
-
380
- # Analyze each category
381
- for j, category in enumerate(categories):
382
- status_text.text(f"Processing {file.name} - {category}")
383
- paper_results[category] = analyze_paper(text, category)
384
-
385
- # Update progress
386
- progress = (i * len(categories) + j + 1) / (
387
- len(uploaded_files) * len(categories)
388
- )
389
- progress_bar.progress(progress)
390
-
391
- # Add small delay to avoid API rate limits
392
- time.sleep(1)
393
-
394
- results.append(paper_results)
395
-
396
- # Create DataFrame
397
- df = pd.DataFrame(results)
398
-
399
- # Convert DataFrame to CSV
400
- csv = df.to_csv(index=False)
401
-
402
- # Create download button
403
- st.download_button(
404
- label="Download Results as CSV",
405
- data=csv,
406
- file_name="research_papers_analysis.csv",
407
- mime="text/csv",
408
- )
409
-
410
- # Display results in the app
411
- st.subheader("Analysis Results")
412
- st.dataframe(df)
413
 
414
- status_text.text("Processing complete!")
415
- progress_bar.progress(1.0)
 
 
 
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  with tabs[4]: # Paraphraser
418
  st.header("Paraphraser")
419
  text = st.text_area("Enter text to paraphrase")
 
54
  def analyze_paper(text: str, category: str) -> str:
55
  """Generate a prompt and get analysis for a specific category."""
56
  prompts = {
57
+ "Journal": "In which journal was this research published:",
58
+ "Journal Quality": "What is the quality or impact factor of the journal in which this research was published:",
59
+ "No Of Citations": "How many times has this research paper been cited:",
60
+ "Date Of Publications": "When was this research paper published:",
61
+ "Title": "What is the title of this research paper:",
62
+ "Abstract": "Provide a summarized version of the abstract of this paper:",
63
+ "Author Keywords": "What keywords were provided by the authors for this research paper:",
64
+ "Theories Used in The Paper": "What theories are utilized or referenced in this research paper:",
65
+ "Context Used In the Paper": "What is the specific context or scenario used in this research:",
66
+ "Methods and Material Used in This Paper": "What methods and materials are used in conducting this research:",
67
+ "Antecedents and Problems": "What antecedents and problems are identified in this research:",
68
+ "Decision and Frameworks To Solve the Problem": "What decision-making frameworks or solutions are proposed in this research:",
69
+ "Outcomes": "What are the outcomes or results of this research:",
70
+ "Study Findings": "What are the detailed findings of this research study:",
71
+ "Conclusions": "What conclusions are drawn from this research:",
72
+ "TSC ADO": "Provide details about the TSC ADO (Theory-Specific Constructs Applied in this research):"
 
73
  }
74
 
75
+ if category in prompts:
76
+ prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
77
+ else:
78
+ prompt = f"Analyze the following text for the category '{category}':\n\nPaper text: {text[:5000]}"
79
  return call_perplexity_api(prompt)
80
 
81
 
 
338
  uploaded_files = st.file_uploader(
339
  "Upload multiple PDF files", type="pdf", accept_multiple_files=True
340
  )
341
+ if 'categories' not in st.session_state:
342
+ st.session_state.categories = [
343
+ "Journal", "Journal Quality", "No Of Citations",
344
+ "Date Of Publications", "Title", "Abstract", "Author Keywords",
345
+ "Theories Used in The Paper", "Context Used In the Paper", "Methods and Material Used in This Paper",
346
+ "Antecedents and Problems", "Decision and Frameworks To Solve the Problem", "Outcomes",
347
+ "Study Findings", "Conclusions",
348
+ "TSC ADO"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  ]
350
+ # Display current categories
351
+ st.write("### Current Categories")
352
+ st.write(st.session_state.categories)
353
+
354
+ # Input to add new category
355
+ new_category = st.text_input("Add a new category")
356
+
357
+ if st.button("Add Category"):
358
+ if new_category.strip(): # Check if input is not empty
359
+ if new_category not in st.session_state.categories: # Avoid duplicates
360
+ st.session_state.categories.append(new_category)
361
+ st.success(f"Category '{new_category}' added!")
362
+ else:
363
+ st.warning(f"Category '{new_category}' already exists!")
364
+ else:
365
+ st.error("Category cannot be empty!") # Button to add the category
366
+
367
 
368
+ # Display updated categories
369
+ st.write("### Updated Categories")
370
+ st.write(st.session_state.categories)
371
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
 
373
+ if uploaded_files:
374
+ if st.button("Process Papers"):
375
+ # Initialize progress bar
376
+ progress_bar = st.progress(0)
377
+ status_text = st.empty()
378
 
379
+ # Initialize results dictionary
380
+ results = []
381
+
382
+ # Define categories
383
+ # categories = [
384
+ # "Summarized Abstract",
385
+ # "Results",
386
+ # "Summarized Introduction",
387
+ # "Methods Used",
388
+ # "Literature Survey",
389
+ # "Limitations",
390
+ # "Contributions",
391
+ # "Practical Implications",
392
+ # "Objectives",
393
+ # "Findings",
394
+ # "Future Research",
395
+ # "Dependent Variables",
396
+ # "Independent Variables",
397
+ # "Dataset",
398
+ # "Problem Statement",
399
+ # "Challenges",
400
+ # "Applications",
401
+ # ]
402
+ # # Display current categories
403
+ # st.write("### Current Categories")
404
+ # st.write(categories)
405
+
406
+ # # Input to add new category
407
+ # new_category = st.text_input("Add a new category")
408
+
409
+ # # Button to add the category
410
+ # if st.button("Add Category"):
411
+ # if new_category.strip(): # Check if input is not empty
412
+ # if new_category not in categories: # Avoid duplicates
413
+ # categories.append(new_category)
414
+ # st.success(f"Category '{new_category}' added!")
415
+ # else:
416
+ # st.warning(f"Category '{new_category}' already exists!")
417
+ # else:
418
+ # st.error("Category cannot be empty!")
419
+
420
+ # # Display updated categories
421
+ # st.write("### Updated Categories")
422
+ # st.write(categories)
423
+
424
+ # Process each file
425
+ for i, file in enumerate(uploaded_files):
426
+ status_text.text(f"Processing {file.name}...")
427
+
428
+ # Extract text from PDF
429
+ text = extract_text_from_pdf(file)
430
+
431
+ # Initialize paper results
432
+ paper_results = {"Filename": file.name}
433
+
434
+ # Analyze each category
435
+ for j, category in enumerate(st.session_state.categories):
436
+ status_text.text(f"Processing {file.name} - {category}")
437
+ paper_results[category] = analyze_paper(text, category)
438
+
439
+ # Update progress
440
+ progress = (i * len(st.session_state.categories) + j + 1) / (
441
+ len(uploaded_files) * len(st.session_state.categories)
442
+ )
443
+ progress_bar.progress(progress)
444
+
445
+ # Add small delay to avoid API rate limits
446
+ time.sleep(1)
447
+
448
+ results.append(paper_results)
449
+
450
+ # Create DataFrame
451
+ df = pd.DataFrame(results)
452
+
453
+ # Convert DataFrame to CSV
454
+ csv = df.to_csv(index=False)
455
+
456
+ # Create download button
457
+ st.download_button(
458
+ label="Download Results as CSV",
459
+ data=csv,
460
+ file_name="research_papers_analysis.csv",
461
+ mime="text/csv",
462
+ )
463
+
464
+ # Display results in the app
465
+ st.subheader("Analysis Results")
466
+ st.dataframe(df)
467
+
468
+ status_text.text("Processing complete!")
469
+ progress_bar.progress(1.0)
470
+
471
  with tabs[4]: # Paraphraser
472
  st.header("Paraphraser")
473
  text = st.text_area("Enter text to paraphrase")