Spaces:

SPJIMR-Internship
/

SPJIMR_FlipClassroom_RCopilot_ResearchInternship

Sleeping

App Files Files Community

YashJD commited on Jan 13

Commit

4eba3d2

1 Parent(s): e107ee4

Changes 1

Browse files

Files changed (7) hide show

extract.py +50 -85
infranew.py +244 -4
new_research_paper.py +269 -10
papers_filtered_export.csv +5 -0
research22.py +41 -8
research_combine2.py +16 -12
sciclone.py +146 -92

extract.py CHANGED Viewed

@@ -33,106 +33,71 @@ def call_perplexity_api(prompt: str) -> str:
         st.error(f"API Error: {str(e)}")
         return ""
-def extract_text_from_pdf(pdf_file):
-    """Extract text content from a PDF file."""
-    pdf_reader = PyPDF2.PdfReader(pdf_file)
-    text = ""
-    for page in pdf_reader.pages:
-        text += page.extract_text() + "\n"
-    return text
-def analyze_paper(text: str, category: str) -> str:
-    """Generate a prompt and get analysis for a specific category."""
-    prompts = {
-        "Summarized Abstract": "Extract and summarize the abstract from this research paper:",
-        "Results": "What are the main results and findings from this research paper:",
-        "Summarized Introduction": "Summarize the introduction section of this research paper:",
-        "Methods Used": "What are the main methods and methodologies used in this research:",
-        "Literature Survey": "Summarize the literature review or related work from this paper:",
-        "Limitations": "What are the limitations mentioned in this research:",
-        "Contributions": "What are the main contributions of this research:",
-        "Practical Implications": "What are the practical implications of this research:",
-        "Objectives": "What are the main objectives of this research:",
-        "Findings": "What are the key findings from this research:",
-        "Future Research": "What future research directions are suggested in this paper:",
-        "Dependent Variables": "What are the dependent variables studied in this research:",
-        "Independent Variables": "What are the independent variables studied in this research:",
-        "Dataset": "What dataset(s) were used in this research:",
-        "Problem Statement": "What is the main problem statement or research question:",
-        "Challenges": "What challenges were faced or addressed in this research:",
-        "Applications": "What are the potential applications of this research:"
-    }
-    prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}"  # Limit text to avoid token limits
-    return call_perplexity_api(prompt)
 def main():
-    st.title("Research Paper Analysis Tool")
     # File uploader
-    uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
-    if uploaded_files:
-        if st.button("Process Papers"):
             # Initialize progress bar
             progress_bar = st.progress(0)
             status_text = st.empty()
             # Initialize results dictionary
             results = []
-            # Define categories
-            categories = [
-                "Summarized Abstract", "Results", "Summarized Introduction",
-                "Methods Used", "Literature Survey", "Limitations",
-                "Contributions", "Practical Implications", "Objectives",
-                "Findings", "Future Research", "Dependent Variables",
-                "Independent Variables", "Dataset", "Problem Statement",
-                "Challenges", "Applications"
-            ]
-            # Process each file
-            for i, file in enumerate(uploaded_files):
-                status_text.text(f"Processing {file.name}...")
-                # Extract text from PDF
-                text = extract_text_from_pdf(file)
-                # Initialize paper results
-                paper_results = {"Filename": file.name}
-                # Analyze each category
-                for j, category in enumerate(categories):
-                    status_text.text(f"Processing {file.name} - {category}")
-                    paper_results[category] = analyze_paper(text, category)
-                    # Update progress
-                    progress = (i * len(categories) + j + 1) / (len(uploaded_files) * len(categories))
-                    progress_bar.progress(progress)
-                    # Add small delay to avoid API rate limits
-                    time.sleep(1)
-                results.append(paper_results)
-            # Create DataFrame
-            df = pd.DataFrame(results)
             # Convert DataFrame to CSV
-            csv = df.to_csv(index=False)
             # Create download button
             st.download_button(
                 label="Download Results as CSV",
                 data=csv,
-                file_name="research_papers_analysis.csv",
-                mime="text/csv"
             )
-            # Display results in the app
             st.subheader("Analysis Results")
-            st.dataframe(df)
             status_text.text("Processing complete!")
             progress_bar.progress(1.0)

         st.error(f"API Error: {str(e)}")
         return ""
 def main():
+    st.title("Research Corpus Synthesis Tool")
     # File uploader
+    uploaded_file = st.file_uploader("Upload CSV file", type="csv")
+    if uploaded_file:
+        if st.button("Process CSV"):
             # Initialize progress bar
             progress_bar = st.progress(0)
             status_text = st.empty()
+            # Read CSV file into DataFrame
+            df = pd.read_csv(uploaded_file)
             # Initialize results dictionary
             results = []
+            # Process each column
+            for i, column in enumerate(df.columns):
+                status_text.text(f"Processing column: {column}")
+                # Extract text from column
+                text = " ".join(df[column].astype(str).tolist())
+                # Generate prompt
+                prompt = f"You are a Professional Researcher and Analyser with 10 yrs of Experience.Find details and Elaborate on  Top Trends,Theories,Methods,FrameWorks with this topic ({column}):\n\n{text[:5000]}"  # Limit text to avoid token limits
+                # Call Perplexity API
+                result = call_perplexity_api(prompt)
+                results.append({"Column": column, "Result": result})
+                # Update progress
+                progress = (i + 1) / len(df.columns)
+                progress_bar.progress(progress)
+            # Create DataFrame from results
+            results_df = pd.DataFrame(results)
             # Convert DataFrame to CSV
+            csv = results_df.to_csv(index=False)
             # Create download button
             st.download_button(
                 label="Download Results as CSV",
                 data=csv,
+                file_name="column_trends_analysis.csv",
+                mime="text/csv",
             )
             st.subheader("Analysis Results")
+            styled_df = results_df.style.set_properties(**{
+                'background-color': '#f9f9f9',
+                'color': '#333',
+                'border-color': 'black',
+                'border-width': '1px',
+                'border-style': 'solid',
+                'font-family': 'Arial, sans-serif',
+                'font-size': '14px',
+                'text-align': 'left',
+                'padding': '10px'
+            })
+            st.dataframe(styled_df)
             status_text.text("Processing complete!")
             progress_bar.progress(1.0)

infranew.py CHANGED Viewed

@@ -1,3 +1,234 @@
 import streamlit as st
 import pandas as pd
 import networkx as nx
@@ -6,8 +237,12 @@ from bokeh.plotting import figure, from_networkx
 import requests
 import json
 import google.generativeai as genai
-PERPLEXITY_API_KEY = "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"
 PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
@@ -132,7 +367,7 @@ def search_papers(topic: str, num_papers: int) -> list:
         return []
-import os
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
@@ -160,7 +395,7 @@ def call_gemini_api(prompt: str) -> str:
 def generate_gaps_paragraph(gaps):
-    prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps: {', '.join(gaps)}"
     return call_gemini_api(prompt)
@@ -193,7 +428,12 @@ def generate_insights(G, topic):
             if gaps_paragraph:
                 st.write("### Gaps in Research")
                 st.write(gaps_paragraph)
 def main():
     st.title("Advanced Interactive Knowledge Graph")

+# import streamlit as st
+# import pandas as pd
+# import networkx as nx
+# from bokeh.models import HoverTool
+# from bokeh.plotting import figure, from_networkx
+# import requests
+# import json
+# import google.generativeai as genai
+# PERPLEXITY_API_KEY = "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"
+# PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
+# def extract_edges(keywords):
+#     keywords = [kw.strip() for kw in keywords.split(",")]
+#     edges = [
+#         (keywords[i], keywords[j])
+#         for i in range(len(keywords))
+#         for j in range(i + 1, len(keywords))
+#     ]
+#     return edges
+# def create_knowledge_graph(data):
+#     G = nx.Graph()
+#     for _, row in data.iterrows():
+#         words = []
+#         for col in data.columns:
+#             if pd.notnull(row[col]):
+#                 # Convert to string and handle numeric values
+#                 cell_value = str(row[col]).strip()
+#                 if cell_value:
+#                     words.extend(cell_value.split())
+#         if words:
+#             edges = extract_edges(",".join(words))
+#             G.add_edges_from(edges)
+#             for word in words:
+#                 word = word.strip()
+#                 if word not in G:
+#                     G.add_node(word, title=word, value=len(word))
+#     return G
+# def render_graph_bokeh(G):
+#     plot = figure(
+#         title="Interactive Knowledge Graph",
+#         x_range=(-1.5, 1.5),
+#         y_range=(-1.5, 1.5),
+#         tools="pan,wheel_zoom,box_zoom,reset,tap",
+#         active_scroll="wheel_zoom",
+#     )
+#     plot.add_tools(HoverTool(tooltips="@index"))
+#     graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0, 0))
+#     graph_renderer.node_renderer.glyph.size = 10
+#     graph_renderer.node_renderer.glyph.fill_color = "blue"
+#     graph_renderer.node_renderer.glyph.line_color = "black"
+#     graph_renderer.edge_renderer.glyph.line_width = 1
+#     graph_renderer.edge_renderer.glyph.line_color = "gray"
+#     plot.renderers.append(graph_renderer)
+#     return plot
+# import re
+# def search_papers(topic: str, num_papers: int) -> list:
+#     headers = {
+#         "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
+#         "Content-Type": "application/json",
+#     }
+#     prompt = f"""Find {num_papers} recent research papers about {topic}.
+#     Return ONLY a valid JSON array with the following structure for each paper:
+#     [
+#         {{
+#             "Title": "paper title",
+#             "Abstract": "abstract text",
+#             "Keywords": "key terms"
+#         }}
+#     ]"""
+#     payload = {
+#         "model": "llama-3.1-sonar-small-128k-chat",
+#         "messages": [
+#             {
+#                 "role": "system",
+#                 "content": "You are a research paper analyzer that returns valid JSON arrays.",
+#             },
+#             {"role": "user", "content": prompt},
+#         ],
+#         "temperature": 0.1,
+#     }
+#     try:
+#         response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
+#         response.raise_for_status()
+#         content = response.json()["choices"][0]["message"]["content"]
+#         # Clean response to ensure valid JSON
+#         content = content.strip()
+#         if not content.startswith("["):
+#             content = content[content.find("[") :]
+#         if not content.endswith("]"):
+#             content = content[: content.rfind("]") + 1]
+#         # Remove any trailing commas before closing brackets
+#         content = re.sub(r",\s*]", "]", content)
+#         content = re.sub(r",\s*}", "}", content)
+#         papers = json.loads(content)
+#         if not isinstance(papers, list):
+#             raise ValueError("Response is not a JSON array")
+#         return papers
+#     except requests.exceptions.RequestException as e:
+#         st.error(f"API Request Error: {str(e)}")
+#         return []
+#     except json.JSONDecodeError as e:
+#         st.error(f"Invalid JSON response: {str(e)}")
+#         st.error(f"Response content: {response.text}")
+#         return []
+#     except ValueError as e:
+#         st.error(f"Error: {str(e)}")
+#         return []
+# import os
+# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+# GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
+# def call_gemini_api(prompt: str) -> str:
+#     headers = {
+#         "Authorization": f"Bearer {GEMINI_API_KEY}",
+#         "Content-Type": "application/json",
+#     }
+#     payload = {
+#         "prompt": prompt,
+#         "max_tokens": 150,
+#         "temperature": 0.7,
+#     }
+#     try:
+#         model = genai.GenerativeModel("gemini-pro")
+#         response = model.generate_content(prompt)
+#         return response.text
+#     except Exception as e:
+#         st.error(f"Gemini API Error: {str(e)}")
+#         return ""
+# def generate_gaps_paragraph(gaps):
+#     prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps: {', '.join(gaps)}"
+#     return call_gemini_api(prompt)
+# def generate_insights(G, topic):
+#     papers = search_papers(topic, 5)
+#     if papers:
+#         st.write("### Research Insights from Perplexity API")
+#         for paper in papers:
+#             st.write(f"**Title:** {paper['Title']}")
+#             st.write(f"**Abstract:** {paper['Abstract']}")
+#             st.write(f"**Keywords:** {paper['Keywords']}")
+#             st.write("---")
+#     nodes = list(G.nodes(data=True))
+#     insights = {}
+#     insights["Strong Points"] = [
+#         n for n, d in nodes if G.degree(n) > len(G.nodes) * 0.1
+#     ]
+#     insights["Weak Points"] = [n for n, d in nodes if G.degree(n) < len(G.nodes) * 0.05]
+#     insights["Gaps"] = [n for n, d in nodes if len(list(nx.neighbors(G, n))) == 0]
+#     st.write("### Graph-Based Insights")
+#     st.write("**Strong Points:**", insights["Strong Points"])
+#     st.write("**Weak Points:**", insights["Weak Points"])
+#     st.write("**Gaps:**", insights["Gaps"])
+#     if insights["Gaps"]:
+#         with st.spinner("Generating insights about gaps..."):
+#             gaps_paragraph = generate_gaps_paragraph(insights["Gaps"])
+#             if gaps_paragraph:
+#                 st.write("### Gaps in Research")
+#                 st.write(gaps_paragraph)
+# def main():
+#     st.title("Advanced Interactive Knowledge Graph")
+#     st.write(
+#         "Upload a CSV file to generate a fully interactive and insightful knowledge graph."
+#     )
+#     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+#     if uploaded_file is not None:
+#         try:
+#             data = pd.read_csv(uploaded_file)
+#             st.write("Preview of the uploaded data:")
+#             st.dataframe(data.head())
+#             G = create_knowledge_graph(data)
+#             st.write("Generated Knowledge Graph:")
+#             plot = render_graph_bokeh(G)
+#             st.bokeh_chart(plot, use_container_width=True)
+#             topic = st.text_input(
+#                 "Enter a topic for additional insights:", "knowledge graphs"
+#             )
+#             if topic:
+#                 generate_insights(G, topic)
+#         except Exception as e:
+#             st.error(f"An error occurred while processing the file: {e}")
+#     else:
+#         st.info("Please upload a CSV file to get started.")
+# if __name__ == "__main__":
+#     main()
 import streamlit as st
 import pandas as pd
 import networkx as nx
 import requests
 import json
 import google.generativeai as genai
+from dotenv import load_dotenv
+import os
+load_dotenv()
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
 PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
         return []
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
 def generate_gaps_paragraph(gaps):
+    prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps  provide evidence-based(how did you deduce this) recommendations for new research paper ideas based on these gaps,   Justify your recommendations with evidence from the data it analyzes. Give atleast 10 new  research paper ideas based on the gaps and 500 words gap analysis,Give a different table for new research ideas with evidence that why did you reccommend it, In the new Table Containing Research Paper Ideas there should be 4 columns - New Research Paper Idea/Title,Evidence , Methodology(How will we approach the ideo new research paper), Evidence Of How Methodology was obtained: {', '.join(gaps)}"
     return call_gemini_api(prompt)
             if gaps_paragraph:
                 st.write("### Gaps in Research")
                 st.write(gaps_paragraph)
+                st.download_button(
+                    label="Download Gaps Analysis as Text",
+                    data=gaps_paragraph,
+                    file_name="gaps_analysis.txt",
+                    mime="text/plain",
+                )
 def main():
     st.title("Advanced Interactive Knowledge Graph")

new_research_paper.py CHANGED Viewed

@@ -1,3 +1,106 @@
 import streamlit as st
 import pandas as pd
 import requests
@@ -33,7 +136,75 @@ def call_perplexity_api(prompt: str) -> str:
         return ""
-def generate_research_paper(df: pd.DataFrame) -> dict:
     """
     For each column in the DataFrame, generate a research paper section (200-500 words)
     that addresses the data in that column. Return a dict mapping column -> text.
@@ -43,9 +214,14 @@ def generate_research_paper(df: pd.DataFrame) -> dict:
         # Convert all non-null rows in the column to strings and join them for context
         col_values = df[col].dropna().astype(str).tolist()
         # We'll truncate if this is huge
         sample_text = " | ".join(col_values[:50])  # limit to first 50 rows for brevity
         prompt = f"""
-        Topic: {col}
         Data Sample: {sample_text}
         Generate a professional research paper section for the above column.
@@ -58,20 +234,31 @@ def generate_research_paper(df: pd.DataFrame) -> dict:
     return paper_sections
-def format_paper(paper_dict: dict) -> str:
     """
     Format the generated paper into a Markdown string.
-    Each column name is used as a heading, and the text is placed under it.
     """
-    md_text = "# Generated Research Paper\n\n"
     for col, content in paper_dict.items():
-        md_text += f"## {col}\n{content}\n\n"
     return md_text
-def main():
     st.title("Corpus-based Research Paper Generator")
     uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
     if uploaded_file:
         df = pd.read_csv(uploaded_file)
@@ -81,9 +268,9 @@ def main():
         if st.button("Generate Research Paper"):
             st.info("Generating paper based on the columns of your corpus...")
             with st.spinner("Calling Perplexity AI..."):
-                paper = generate_research_paper(df)
                 if paper:
-                    formatted_paper = format_paper(paper)
                     st.success("Research Paper Generated Successfully!")
                     st.write(formatted_paper)
@@ -98,6 +285,78 @@ def main():
                         "Paper generation failed. Please check Perplexity API key."
                     )
 if __name__ == "__main__":
-    main()

+# import streamlit as st
+# import pandas as pd
+# import requests
+# import json
+# import os
+# from dotenv import load_dotenv
+# # Load environment variables
+# load_dotenv()
+# PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
+# PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
+# def call_perplexity_api(prompt: str) -> str:
+#     """Call Perplexity AI with a prompt, return the text response if successful."""
+#     headers = {
+#         "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
+#         "Content-Type": "application/json",
+#     }
+#     payload = {
+#         "model": "llama-3.1-sonar-small-128k-chat",
+#         "messages": [{"role": "user", "content": prompt}],
+#         "temperature": 0.3,
+#     }
+#     try:
+#         response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
+#         response.raise_for_status()
+#         return response.json()["choices"][0]["message"]["content"]
+#     except Exception as e:
+#         st.error(f"API Error: {str(e)}")
+#         return ""
+# def generate_research_paper(df: pd.DataFrame) -> dict:
+#     """
+#     For each column in the DataFrame, generate a research paper section (200-500 words)
+#     that addresses the data in that column. Return a dict mapping column -> text.
+#     """
+#     paper_sections = {}
+#     for col in df.columns:
+#         # Convert all non-null rows in the column to strings and join them for context
+#         col_values = df[col].dropna().astype(str).tolist()
+#         # We'll truncate if this is huge
+#         sample_text = " | ".join(col_values[:50])  # limit to first 50 rows for brevity
+#         prompt = f"""
+#         Topic: {col}
+#         Data Sample: {sample_text}
+#         Generate a professional research paper section for the above column.
+#         The section should be at least 100 words and at most 150 words,
+#         focusing on key insights, challenges, and potential research angles.
+#         Integrate the data samples as context for the content.
+#         """
+#         section_text = call_perplexity_api(prompt)
+#         paper_sections[col] = section_text.strip() if section_text else ""
+#     return paper_sections
+# def format_paper(paper_dict: dict) -> str:
+#     """
+#     Format the generated paper into a Markdown string.
+#     Each column name is used as a heading, and the text is placed under it.
+#     """
+#     md_text = "# Generated Research Paper\n\n"
+#     for col, content in paper_dict.items():
+#         md_text += f"## {col}\n{content}\n\n"
+#     return md_text
+# def main():
+#     st.title("Corpus-based Research Paper Generator")
+#     uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
+#     if uploaded_file:
+#         df = pd.read_csv(uploaded_file)
+#         st.write("### Preview of Uploaded Data")
+#         st.dataframe(df.head())
+#         if st.button("Generate Research Paper"):
+#             st.info("Generating paper based on the columns of your corpus...")
+#             with st.spinner("Calling Perplexity AI..."):
+#                 paper = generate_research_paper(df)
+#                 if paper:
+#                     formatted_paper = format_paper(paper)
+#                     st.success("Research Paper Generated Successfully!")
+#                     st.write(formatted_paper)
+#                     st.download_button(
+#                         label="Download Paper as Markdown",
+#                         data=formatted_paper,
+#                         file_name="research_paper.md",
+#                         mime="text/markdown",
+#                     )
+#                 else:
+#                     st.error(
+#                         "Paper generation failed. Please check Perplexity API key."
+#                     )
+# if __name__ == "__main__":
+#     main()
 import streamlit as st
 import pandas as pd
 import requests
         return ""
+# def generate_research_paper(df: pd.DataFrame) -> dict:
+#     """
+#     For each column in the DataFrame, generate a research paper section (200-500 words)
+#     that addresses the data in that column. Return a dict mapping column -> text.
+#     """
+#     paper_sections = {}
+#     for col in df.columns:
+#         # Convert all non-null rows in the column to strings and join them for context
+#         col_values = df[col].dropna().astype(str).tolist()
+#         # We'll truncate if this is huge
+#         sample_text = " | ".join(col_values[:50])  # limit to first 50 rows for brevity
+#         prompt = f"""
+#         Topic: {col}
+#         Data Sample: {sample_text}
+#         Generate a professional research paper section for the above column.
+#         The section should be at least 100 words and at most 150 words,
+#         focusing on key insights, challenges, and potential research angles.
+#         Integrate the data samples as context for the content.
+#         """
+#         section_text = call_perplexity_api(prompt)
+#         paper_sections[col] = section_text.strip() if section_text else ""
+#     return paper_sections
+# def format_paper(paper_dict: dict) -> str:
+#     """
+#     Format the generated paper into a Markdown string.
+#     Each column name is used as a heading, and the text is placed under it.
+#     """
+#     md_text = "# Generated Research Paper\n\n"
+#     for col, content in paper_dict.items():
+#         md_text += f"## {col}\n{content}\n\n"
+#     return md_text
+# def main():
+#     st.title("Corpus-based Research Paper Generator")
+#     uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
+#     if uploaded_file:
+#         df = pd.read_csv(uploaded_file)
+#         st.write("### Preview of Uploaded Data")
+#         st.dataframe(df.head())
+#         if st.button("Generate Research Paper"):
+#             st.info("Generating paper based on the columns of your corpus...")
+#             with st.spinner("Calling Perplexity AI..."):
+#                 paper = generate_research_paper(df)
+#                 if paper:
+#                     formatted_paper = format_paper(paper)
+#                     st.success("Research Paper Generated Successfully!")
+#                     st.write(formatted_paper)
+#                     st.download_button(
+#                         label="Download Paper as Markdown",
+#                         data=formatted_paper,
+#                         file_name="research_paper.md",
+#                         mime="text/markdown",
+#                     )
+#                 else:
+#                     st.error(
+#                         "Paper generation failed. Please check Perplexity API key."
+#                     )
+# if __name__ == "__main__":
+#     main()
+#def generate_research_paper(df: pd.DataFrame, gaps_analysis: str, topic: str, journal: str, format: str) -> dict:
     """
     For each column in the DataFrame, generate a research paper section (200-500 words)
     that addresses the data in that column. Return a dict mapping column -> text.
         # Convert all non-null rows in the column to strings and join them for context
         col_values = df[col].dropna().astype(str).tolist()
         # We'll truncate if this is huge
+        print(col)
         sample_text = " | ".join(col_values[:50])  # limit to first 50 rows for brevity
         prompt = f"""
+        Topic: {topic}
+        Journal/Conference: {journal}
+        Format: {format}
+        Gaps Analysis: {gaps_analysis}
+        Column: {col}
         Data Sample: {sample_text}
         Generate a professional research paper section for the above column.
     return paper_sections
+#def format_paper(paper_dict: dict, topic: str, journal: str, format: str) -> str:
     """
     Format the generated paper into a Markdown string.
+    Add the topic, journal, and format as the main title, each column name as a heading,
+    and the corresponding text as paragraph content.
     """
+    md_text = f"# Research Paper on: {topic}\n\n"
+    md_text += f"## Journal/Conference: {journal}\n\n"
+    md_text += f"## Format: {format}\n\n"
     for col, content in paper_dict.items():
+        md_text += f"### {col}\n{content}\n\n"
     return md_text
+#def main():
     st.title("Corpus-based Research Paper Generator")
+    topic_input = st.text_input("Enter the topic for the research paper:")
+    journal_input = st.text_input("Enter the Journal/Conference aimed to publish:")
+    format_input = st.text_input("Enter the format of the research paper:")
+    gaps_analysis_file = st.file_uploader("Upload Gaps Analysis (.txt file)", type="txt")
+    gaps_analysis = ""
+    if gaps_analysis_file:
+        gaps_analysis = gaps_analysis_file.getvalue().decode("utf-8")
     uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
     if uploaded_file:
         df = pd.read_csv(uploaded_file)
         if st.button("Generate Research Paper"):
             st.info("Generating paper based on the columns of your corpus...")
             with st.spinner("Calling Perplexity AI..."):
+                paper = generate_research_paper(df, gaps_analysis, topic_input, journal_input, format_input)
                 if paper:
+                    formatted_paper = format_paper(paper, topic_input, journal_input, format_input)
                     st.success("Research Paper Generated Successfully!")
                     st.write(formatted_paper)
                         "Paper generation failed. Please check Perplexity API key."
                     )
+def generate_research_paper(df: pd.DataFrame, gaps_analysis: str, topic: str, journal: str, format: str) -> str:
+    """
+    Generate a research paper based on the entire DataFrame, the topic, journal, and format.
+    """
+    # Convert the entire DataFrame to a string
+    df_string = df.to_string(index=False)
+    # Create the prompt
+    prompt = f"""
+    Topic: {topic}
+    Journal/Conference: {journal}
+    Format: {format}
+    Gaps Analysis: {gaps_analysis}
+    Data:
+    {df_string}
+    Generate a professional research paper based on the above data.
+    The paper should be well-structured, focusing on key insights, challenges, and potential research angles.
+    Use the Gaps Analysis to identify areas for improvement and future work and fill the gaps in the new paper.
+    Use the data as a reference to support your arguments, dont directly copy the data.
+    Ensure the paper is formatted according to the specified journal/conference format.
+    """
+    # Call the Perplexity API
+    paper_text = call_perplexity_api(prompt)
+    return paper_text.strip() if paper_text else ""
+def format_paper(paper_text: str, topic: str, journal: str, format: str) -> str:
+    """
+    Format the generated paper into a Markdown string.
+    Add the topic, journal, and format as the main title, and the paper text as content.
+    """
+    md_text = f"# Research Paper on: {topic}\n\n"
+    md_text += paper_text
+    return md_text
+def main():
+    st.title("Corpus-based Research Paper Generator")
+    topic_input = st.text_input("Enter the topic for the research paper:")
+    journal_input = st.text_input("Enter the Journal/Conference aimed to publish:")
+    format_input = st.text_input("Enter the format of the research paper:")
+    gaps_analysis_file = st.file_uploader("Upload Gaps Analysis (.txt file)", type="txt")
+    gaps_analysis = ""
+    if gaps_analysis_file:
+        gaps_analysis = gaps_analysis_file.getvalue().decode("utf-8")
+    uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
+    if uploaded_file:
+        df = pd.read_csv(uploaded_file)
+        st.write("### Preview of Uploaded Data")
+        st.dataframe(df.head())
+        if st.button("Generate Research Paper"):
+            st.info("Generating paper based on the columns of your corpus...")
+            with st.spinner("Calling Perplexity AI..."):
+                paper_text = generate_research_paper(df, gaps_analysis, topic_input, journal_input, format_input)
+                if paper_text:
+                    formatted_paper = format_paper(paper_text, topic_input, journal_input, format_input)
+                    st.success("Research Paper Generated Successfully!")
+                    st.write(formatted_paper)
+                    st.download_button(
+                        label="Download Paper as Markdown",
+                        data=formatted_paper,
+                        file_name="research_paper.md",
+                        mime="text/markdown",
+                    )
+                else:
+                    st.error(
+                        "Paper generation failed. Please check Perplexity API key."
+                    )
 if __name__ == "__main__":
+    main()

papers_filtered_export.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+_id,Title,Publication,Journal_Conference,Abstract,Keywords,Author,Date_of_Publication,Intro,Literature_Review,Introduction,Body,Methodology,Participants,Survey Instrument,Data Collection,Data Analysis,Results and Discussion,Conclusion,References
+6783960b5c01fc5ec3a02bcc,Enhancing Student Engagement in GenAI Education: A FlipClassroom Approach,Journal of Educational Technology,Journal of Educational Technology,"This study explores the integration of Generative AI (GenAI) into a flipped classroom setting to enhance student engagement in educational technology courses. The authors designed an experiment where students used GenAI tools to create interactive learning materials before class, which were then discussed in class. The results showed significant improvements in student participation and understanding.","GenAI, flipped classroom, educational technology, student engagement","John Doe, Jane Smith",2023-02-15,The integration of GenAI into educational settings has been gaining attention due to its potential to enhance learning experiences.,"Previous studies have shown that flipped classrooms can improve student engagement, but the addition of GenAI tools has not been extensively explored.",This study aims to investigate whether incorporating GenAI into a flipped classroom setting can further enhance student engagement.,The study involved 100 students divided into two groups: one using traditional methods and the other using GenAI tools.,"Students in the GenAI group created interactive materials using AI tools before class, while those in the traditional group used standard textbooks.",100 students from a university in the United States,A survey was administered at the end of the semester to assess student engagement and understanding.,Data was collected through surveys and class participation observations.,Descriptive statistics were used to analyze survey responses and class participation data.,The results showed that students in the GenAI group had higher participation rates and better understanding of the material compared to those in the traditional group.,The integration of GenAI into a flipped classroom setting can significantly enhance student engagement and understanding in educational technology courses.,"{'Author': 'Doe, J.', 'Year': '2023', 'Title': 'Enhancing Student Engagement in GenAI Education'}, {'Author': 'Smith, J.', 'Year': '2022', 'Title': 'Flipped Classrooms and Educational Technology'}"
+6783960b5c01fc5ec3a02bcd,Assessing the Impact of GenAI on Student Outcomes in a Flipped Classroom Environment,Journal of Educational Innovation,Journal of Educational Innovation,"This study evaluates the impact of integrating Generative AI (GenAI) into a flipped classroom environment on student outcomes. The authors conducted an experiment where students used GenAI tools to create personalized learning materials, which were then discussed in class. The results indicate that GenAI integration led to improved student performance and increased student satisfaction.","GenAI, flipped classroom, student outcomes, educational innovation","Jane Doe, John Smith",2023-05-01,The integration of GenAI into educational settings has been gaining attention due to its potential to personalize learning experiences.,"Previous studies have shown that flipped classrooms can improve student outcomes, but the addition of GenAI tools has not been extensively explored.",This study aims to investigate whether incorporating GenAI into a flipped classroom setting can improve student performance and satisfaction.,The study involved 150 students divided into two groups: one using traditional methods and the other using GenAI tools.,"Students in the GenAI group created personalized materials using AI tools before class, while those in the traditional group used standard textbooks.",150 students from a university in the United States,A survey was administered at the end of the semester to assess student satisfaction and performance.,Data was collected through surveys and class participation observations.,Descriptive statistics were used to analyze survey responses and class participation data.,The results showed that students in the GenAI group had higher performance rates and better satisfaction compared to those in the traditional group.,The integration of GenAI into a flipped classroom setting can significantly improve student performance and satisfaction in educational technology courses.,"{'Author': 'Doe, J.', 'Year': '2023', 'Title': 'Assessing the Impact of GenAI on Student Outcomes'}, {'Author': 'Smith, J.', 'Year': '2022', 'Title': 'Flipped Classrooms and Educational Innovation'}"
+678396345c01fc5ec3a02bd0,Exploring the Impact of GenAI on the FlipClassroom Model: A Case Study,Journal of Educational Technology,Journal of Educational Technology,"This study investigates the integration of Generative AI (GenAI) into the flipped classroom model. We examine how GenAI can enhance student engagement, improve learning outcomes, and reduce teacher workload.","GenAI, flipped classroom, educational technology, student engagement, learning outcomes","John Doe, Jane Smith",2023-02-15,"The flipped classroom model has gained popularity in recent years due to its potential to improve student learning outcomes. However, it also presents challenges such as increased teacher workload and limited student engagement.",A comprehensive review of existing literature on the flipped classroom model and its integration with GenAI is provided.,This study aims to explore the impact of GenAI on the flipped classroom model by examining its effects on student engagement and learning outcomes.,The study employed a mixed-methods approach combining both qualitative and quantitative data collection methods.,A total of 100 students participated in this study. They were divided into two groups: one using traditional teaching methods and the other using GenAI-enhanced materials.,100 students from a local university,A survey was administered to gather data on student perceptions and experiences.,"Data was collected through surveys, interviews, and observation.",Qualitative data was analyzed using thematic analysis while quantitative data was analyzed using statistical methods.,The results show significant improvements in student engagement and learning outcomes when GenAI is integrated into the flipped classroom model.,The integration of GenAI into the flipped classroom model can significantly enhance student engagement and improve learning outcomes.,"{'Author': 'Doe, J.', 'Year': '2023', 'Title': 'GenAI in Education'}, {'Author': 'Smith, J.', 'Year': '2022', 'Title': 'Flipped Classroom Models'}"
+678396345c01fc5ec3a02bd1,Enhancing the Flipped Classroom with Generative AI: A Pilot Study,International Journal of Educational Research,International Journal of Educational Research,This pilot study explores the potential of Generative AI (GenAI) to enhance the flipped classroom model by automating content creation and providing personalized learning experiences.,"GenAI, flipped classroom, educational technology, personalized learning","Jane Doe, John Smith",2023-05-01,The flipped classroom model has shown promise in improving student learning outcomes but faces challenges related to content creation and personalization.,A review of existing literature on GenAI applications in education is provided.,This study aims to pilot-test the integration of GenAI into the flipped classroom model focusing on its potential for automating content creation and providing personalized learning experiences.,The study employed an experimental design where one group received GenAI-generated materials while another group received traditional materials.,A total of 50 students participated in this pilot study. They were divided into two groups based on their exposure to GenAI-generated materials.,50 students from a local high school,A survey was administered post-intervention to gather feedback from participants.,Data was collected through surveys and observation.,Qualitative data was analyzed using thematic analysis while quantitative data was analyzed using statistical methods.,The results indicate that GenAI can effectively automate content creation and provide personalized learning experiences within the flipped classroom model.,The integration of GenAI into the flipped classroom model shows promise for enhancing student engagement and improving learning outcomes through automation and personalization.,"{'Author': 'Doe, J.', 'Year': '2023', 'Title': 'GenAI in Education'}, {'Author': 'Smith, J.', 'Year': '2022', 'Title': 'Flipped Classroom Models'}"

research22.py CHANGED Viewed

@@ -10,6 +10,7 @@ from dotenv import load_dotenv
 import os
 import json
 import re
 # --------------------------------------------------------------------------------
 # 1. Environment Setup
@@ -20,6 +21,8 @@ MONGODB_URI = os.getenv(
     "MONGODB_UR",
     "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
 )
 # Gemini
 GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
@@ -27,6 +30,36 @@ GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
 genai.configure(api_key=GEMINI_KEY)
 # --------------------------------------------------------------------------------
 # 2. Database Connection
 # --------------------------------------------------------------------------------
@@ -67,13 +100,13 @@ def extract_text_from_pdf(pdf_file) -> str:
 # --------------------------------------------------------------------------------
 # 4. Gemini Response Helper
 # --------------------------------------------------------------------------------
-def get_gemini_response(prompt: str) -> str:
     """
     Sends a prompt to Google's Gemini model and returns the response text.
     Adjust this function as needed for your generative AI usage.
     """
     try:
-        model = genai.GenerativeModel("gemini-pro")
         response = model.generate_content(prompt)
         return response.text
     except Exception as e:
@@ -112,7 +145,7 @@ def extract_basic_info(text: str) -> Dict[str, str]:
     Author: ...
     Date_of_Publication: ...
     """
-    response = get_gemini_response(prompt)
     if not response:
         return {}
     info = {}
@@ -153,7 +186,7 @@ def extract_content_sections(text: str) -> Dict[str, str]:
     Future_Scope: <text>
     Theory: <text>
     """
-    response = get_gemini_response(prompt)
     if not response:
         return {}
     sections = {}
@@ -196,7 +229,7 @@ def extract_variables(text: str) -> Dict[str, Any]:
     Paper text: {text}
     """
-    response = get_gemini_response(prompt)
     if not response:
         return {}
     variables = {}
@@ -394,7 +427,7 @@ def extract_paper_fields(text: str, paper_type: str) -> Dict[str, Any]:
     Paper text:
     {text}
-    Return them in this JSON format strictly, with no extra text:
     [
         {{
             {", ".join([f'"{attr}": "value"' for attr in selected_attrs])}
@@ -403,7 +436,7 @@ def extract_paper_fields(text: str, paper_type: str) -> Dict[str, Any]:
     """
     try:
-        response = get_gemini_response(prompt)
         if not response:
             st.error("No response from Gemini.")
             return {}
@@ -451,7 +484,7 @@ def process_paper(text: str, paper_type: str):
     the corresponding MongoDB collection.
     """
     db = create_db_connection()
-    if not db:
         return
     # Determine collection name

 import os
 import json
 import re
+import requests
 # --------------------------------------------------------------------------------
 # 1. Environment Setup
     "MONGODB_UR",
     "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
 )
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
+PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
 # Gemini
 GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
 genai.configure(api_key=GEMINI_KEY)
+def call_perplexity_api(prompt: str) -> str:
+    """
+    Call Perplexity AI with a prompt, returning the text response if successful.
+    """
+    headers = {
+        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    payload = {
+        "model": "llama-3.1-sonar-small-128k-chat",
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.3,
+    }
+    try:
+        response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except Exception as e:
+        st.error(f"Perplexity API Error: {str(e)}")
+        return ""
+def get_perplexity_response(prompt: str) -> str:
+    """
+    Wrapper that calls call_perplexity_api, mimicking the old gemini function name signature.
+    """
+    return call_perplexity_api(prompt)
 # --------------------------------------------------------------------------------
 # 2. Database Connection
 # --------------------------------------------------------------------------------
 # --------------------------------------------------------------------------------
 # 4. Gemini Response Helper
 # --------------------------------------------------------------------------------
+def get_perplexity_response(prompt: str) -> str:
     """
     Sends a prompt to Google's Gemini model and returns the response text.
     Adjust this function as needed for your generative AI usage.
     """
     try:
+        model = genai.GenerativeModel("gemini-1.5-pro")
         response = model.generate_content(prompt)
         return response.text
     except Exception as e:
     Author: ...
     Date_of_Publication: ...
     """
+    response = get_perplexity_response(prompt)
     if not response:
         return {}
     info = {}
     Future_Scope: <text>
     Theory: <text>
     """
+    response = get_perplexity_response(prompt)
     if not response:
         return {}
     sections = {}
     Paper text: {text}
     """
+    response = get_perplexity_response(prompt)
     if not response:
         return {}
     variables = {}
     Paper text:
     {text}
+    Return them in this JSON format strictly, with no extra text, and strictly don't start the JSON with a newline or markdown and don't have Unterminated string:
     [
         {{
             {", ".join([f'"{attr}": "value"' for attr in selected_attrs])}
     """
     try:
+        response = get_perplexity_response(prompt)
         if not response:
             st.error("No response from Gemini.")
             return {}
     the corresponding MongoDB collection.
     """
     db = create_db_connection()
+    if db is None:
         return
     # Determine collection name

research_combine2.py CHANGED Viewed

@@ -193,12 +193,16 @@ def display_research_assistant_dashboard():
             "Knowledge Graph",
             "Cosine Similarity",
             "Paper Generator",
-            "Paper from Topic",
             "Download Entire Corpus",
             "Research Copilot",
-            "Research Paper Analysis Tool",
         ],
     )
     if option == "Search Papers":
         st.subheader("Search and Store Papers")
@@ -236,13 +240,14 @@ def display_research_assistant_dashboard():
                 st.warning("Please enter a research topic")
         # Add MongoDB connection status
-        if st.sidebar.button("Check Database Connection"):
-            try:
-                client.admin.command("ping")
-                print(MONGODB_URI)
-                st.sidebar.success("Connected to MongoDB")
-            except Exception as e:
-                st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
     elif option == "Single Keyword Search":
         keywords_database_download.main()
     elif option == "Multiple Keywords Search":
@@ -253,13 +258,12 @@ def display_research_assistant_dashboard():
         loldude.main()
     elif option == "Paper Generator":
         new_research_paper.main()
-    elif option == "Paper from Topic":
-        research3.main()
     elif option == "Download Entire Corpus":
         entire_download.main()
     elif option == "Research Copilot":
         sciclone.main()
-    elif option == "Research Paper Analysis Tool":
         extract.main()
     else:
         research22.main()

             "Knowledge Graph",
             "Cosine Similarity",
             "Paper Generator",
             "Download Entire Corpus",
             "Research Copilot",
+            "Research Corpus Synthesis Tool",
         ],
     )
+    if st.sidebar.button("Logout", use_container_width=True):
+            for key in st.session_state.keys():
+                del st.session_state[key]
+            st.rerun()
     if option == "Search Papers":
         st.subheader("Search and Store Papers")
                 st.warning("Please enter a research topic")
         # Add MongoDB connection status
+        # if st.sidebar.button("Check Database Connection"):
+        #     try:
+        #         client.admin.command("ping")
+        #         print(MONGODB_URI)
+        #         st.sidebar.success("Connected to MongoDB")
+        #     except Exception as e:
+        #         st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
     elif option == "Single Keyword Search":
         keywords_database_download.main()
     elif option == "Multiple Keywords Search":
         loldude.main()
     elif option == "Paper Generator":
         new_research_paper.main()
     elif option == "Download Entire Corpus":
         entire_download.main()
     elif option == "Research Copilot":
         sciclone.main()
+    elif option == "Research Corpus Synthesis Tool":
         extract.main()
     else:
         research22.main()

sciclone.py CHANGED Viewed

@@ -54,26 +54,28 @@ def extract_text_from_pdf(pdf_file):
 def analyze_paper(text: str, category: str) -> str:
     """Generate a prompt and get analysis for a specific category."""
     prompts = {
-        "Summarized Abstract": "Extract and summarize the abstract from this research paper:",
-        "Results": "What are the main results and findings from this research paper:",
-        "Summarized Introduction": "Summarize the introduction section of this research paper:",
-        "Methods Used": "What are the main methods and methodologies used in this research:",
-        "Literature Survey": "Summarize the literature review or related work from this paper:",
-        "Limitations": "What are the limitations mentioned in this research:",
-        "Contributions": "What are the main contributions of this research:",
-        "Practical Implications": "What are the practical implications of this research:",
-        "Objectives": "What are the main objectives of this research:",
-        "Findings": "What are the key findings from this research:",
-        "Future Research": "What future research directions are suggested in this paper:",
-        "Dependent Variables": "What are the dependent variables studied in this research:",
-        "Independent Variables": "What are the independent variables studied in this research:",
-        "Dataset": "What dataset(s) were used in this research:",
-        "Problem Statement": "What is the main problem statement or research question:",
-        "Challenges": "What challenges were faced or addressed in this research:",
-        "Applications": "What are the potential applications of this research:",
     }
-    prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}"  # Limit text to avoid token limits
     return call_perplexity_api(prompt)
@@ -336,84 +338,136 @@ def main():
         uploaded_files = st.file_uploader(
             "Upload multiple PDF  files", type="pdf", accept_multiple_files=True
         )
-        if uploaded_files:
-            if st.button("Process Papers"):
-                # Initialize progress bar
-                progress_bar = st.progress(0)
-                status_text = st.empty()
-                # Initialize results dictionary
-                results = []
-                # Define categories
-                categories = [
-                    "Summarized Abstract",
-                    "Results",
-                    "Summarized Introduction",
-                    "Methods Used",
-                    "Literature Survey",
-                    "Limitations",
-                    "Contributions",
-                    "Practical Implications",
-                    "Objectives",
-                    "Findings",
-                    "Future Research",
-                    "Dependent Variables",
-                    "Independent Variables",
-                    "Dataset",
-                    "Problem Statement",
-                    "Challenges",
-                    "Applications",
                 ]
-                # Process each file
-                for i, file in enumerate(uploaded_files):
-                    status_text.text(f"Processing {file.name}...")
-                    # Extract text from PDF
-                    text = extract_text_from_pdf(file)
-                    # Initialize paper results
-                    paper_results = {"Filename": file.name}
-                    # Analyze each category
-                    for j, category in enumerate(categories):
-                        status_text.text(f"Processing {file.name} - {category}")
-                        paper_results[category] = analyze_paper(text, category)
-                        # Update progress
-                        progress = (i * len(categories) + j + 1) / (
-                            len(uploaded_files) * len(categories)
-                        )
-                        progress_bar.progress(progress)
-                        # Add small delay to avoid API rate limits
-                        time.sleep(1)
-                    results.append(paper_results)
-                # Create DataFrame
-                df = pd.DataFrame(results)
-                # Convert DataFrame to CSV
-                csv = df.to_csv(index=False)
-                # Create download button
-                st.download_button(
-                    label="Download Results as CSV",
-                    data=csv,
-                    file_name="research_papers_analysis.csv",
-                    mime="text/csv",
-                )
-                # Display results in the app
-                st.subheader("Analysis Results")
-                st.dataframe(df)
-                status_text.text("Processing complete!")
-                progress_bar.progress(1.0)
     with tabs[4]:  # Paraphraser
         st.header("Paraphraser")
         text = st.text_area("Enter text to paraphrase")

 def analyze_paper(text: str, category: str) -> str:
     """Generate a prompt and get analysis for a specific category."""
     prompts = {
+        "Journal": "In which journal was this research published:",
+        "Journal Quality": "What is the quality or impact factor of the journal in which this research was published:",
+        "No Of Citations": "How many times has this research paper been cited:",
+        "Date Of Publications": "When was this research paper published:",
+        "Title": "What is the title of this research paper:",
+        "Abstract": "Provide a summarized version of the abstract of this paper:",
+        "Author Keywords": "What keywords were provided by the authors for this research paper:",
+        "Theories Used in The Paper": "What theories are utilized or referenced in this research paper:",
+        "Context Used In the Paper": "What is the specific context or scenario used in this research:",
+        "Methods and Material Used in This Paper": "What methods and materials are used in conducting this research:",
+        "Antecedents and Problems": "What antecedents and problems are identified in this research:",
+        "Decision and Frameworks To Solve the Problem": "What decision-making frameworks or solutions are proposed in this research:",
+        "Outcomes": "What are the outcomes or results of this research:",
+        "Study Findings": "What are the detailed findings of this research study:",
+        "Conclusions": "What conclusions are drawn from this research:",
+        "TSC ADO": "Provide details about the TSC ADO (Theory-Specific Constructs Applied in this research):"
     }
+    if category in prompts:
+        prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}"  # Limit text to avoid token limits
+    else:
+        prompt = f"Analyze the following text for the category '{category}':\n\nPaper text: {text[:5000]}"
     return call_perplexity_api(prompt)
         uploaded_files = st.file_uploader(
             "Upload multiple PDF  files", type="pdf", accept_multiple_files=True
         )
+        if 'categories' not in st.session_state:
+            st.session_state.categories = [
+           "Journal", "Journal Quality", "No Of Citations",
+           "Date Of Publications", "Title", "Abstract", "Author Keywords",
+           "Theories Used in The Paper", "Context Used In the Paper", "Methods and Material Used in This Paper",
+           "Antecedents and Problems", "Decision and Frameworks To Solve the Problem", "Outcomes",
+           "Study Findings", "Conclusions",
+           "TSC ADO"
                 ]
+                # Display current categories
+        st.write("### Current Categories")
+        st.write(st.session_state.categories)
+                # Input to add new category
+        new_category = st.text_input("Add a new category")
+        if st.button("Add Category"):
+                    if new_category.strip():  # Check if input is not empty
+                        if new_category not in st.session_state.categories:  # Avoid duplicates
+                            st.session_state.categories.append(new_category)
+                            st.success(f"Category '{new_category}' added!")
+                        else:
+                            st.warning(f"Category '{new_category}' already exists!")
+                    else:
+                        st.error("Category cannot be empty!")        # Button to add the category
+                # Display updated categories
+        st.write("### Updated Categories")
+        st.write(st.session_state.categories)
+        if uploaded_files:
+                if st.button("Process Papers"):
+                    # Initialize progress bar
+                    progress_bar = st.progress(0)
+                    status_text = st.empty()
+                # Initialize results dictionary
+                    results = []
+                    # Define categories
+                    # categories = [
+                    #     "Summarized Abstract",
+                    #     "Results",
+                    #     "Summarized Introduction",
+                    #     "Methods Used",
+                    #     "Literature Survey",
+                    #     "Limitations",
+                    #     "Contributions",
+                    #     "Practical Implications",
+                    #     "Objectives",
+                    #     "Findings",
+                    #     "Future Research",
+                    #     "Dependent Variables",
+                    #     "Independent Variables",
+                    #     "Dataset",
+                    #     "Problem Statement",
+                    #     "Challenges",
+                    #     "Applications",
+                    # ]
+                    # # Display current categories
+                    # st.write("### Current Categories")
+                    # st.write(categories)
+                    # # Input to add new category
+                    # new_category = st.text_input("Add a new category")
+                    # # Button to add the category
+                    # if st.button("Add Category"):
+                    #     if new_category.strip():  # Check if input is not empty
+                    #         if new_category not in categories:  # Avoid duplicates
+                    #             categories.append(new_category)
+                    #             st.success(f"Category '{new_category}' added!")
+                    #         else:
+                    #             st.warning(f"Category '{new_category}' already exists!")
+                    #     else:
+                    #         st.error("Category cannot be empty!")
+                    # # Display updated categories
+                    # st.write("### Updated Categories")
+                    # st.write(categories)
+                    # Process each file
+                    for i, file in enumerate(uploaded_files):
+                        status_text.text(f"Processing {file.name}...")
+                        # Extract text from PDF
+                        text = extract_text_from_pdf(file)
+                        # Initialize paper results
+                        paper_results = {"Filename": file.name}
+                        # Analyze each category
+                        for j, category in enumerate(st.session_state.categories):
+                            status_text.text(f"Processing {file.name} - {category}")
+                            paper_results[category] = analyze_paper(text, category)
+                            # Update progress
+                            progress = (i * len(st.session_state.categories) + j + 1) / (
+                                len(uploaded_files) * len(st.session_state.categories)
+                            )
+                            progress_bar.progress(progress)
+                            # Add small delay to avoid API rate limits
+                            time.sleep(1)
+                        results.append(paper_results)
+                    # Create DataFrame
+                    df = pd.DataFrame(results)
+                    # Convert DataFrame to CSV
+                    csv = df.to_csv(index=False)
+                    # Create download button
+                    st.download_button(
+                        label="Download Results as CSV",
+                        data=csv,
+                        file_name="research_papers_analysis.csv",
+                        mime="text/csv",
+                    )
+                    # Display results in the app
+                    st.subheader("Analysis Results")
+                    st.dataframe(df)
+                    status_text.text("Processing complete!")
+                    progress_bar.progress(1.0)
     with tabs[4]:  # Paraphraser
         st.header("Paraphraser")
         text = st.text_area("Enter text to paraphrase")