|
import streamlit as st |
|
import requests |
|
import PyPDF2 |
|
from typing import Optional, Dict, List |
|
import json |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from concurrent.futures import ThreadPoolExecutor |
|
import xml.etree.ElementTree as ET |
|
import re |
|
from datetime import datetime |
|
import time |
|
from dotenv import load_dotenv |
|
import os |
|
import pandas as pd |
|
|
|
|
|
load_dotenv() |
|
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY") |
|
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions" |
|
SAPLING_API_KEY = os.getenv("SAPLING_API_KEY") |
|
|
|
|
|
def call_perplexity_api(prompt: str) -> str: |
|
"""Call Perplexity AI with a prompt, return the text response if successful.""" |
|
headers = { |
|
"Authorization": f"Bearer {PERPLEXITY_API_KEY}", |
|
"Content-Type": "application/json", |
|
} |
|
|
|
payload = { |
|
"model": "llama-3.1-sonar-small-128k-chat", |
|
"messages": [{"role": "user", "content": prompt}], |
|
"temperature": 0.3, |
|
} |
|
|
|
try: |
|
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload) |
|
response.raise_for_status() |
|
return response.json()["choices"][0]["message"]["content"] |
|
except Exception as e: |
|
st.error(f"API Error: {str(e)}") |
|
return "" |
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
"""Extract text content from a PDF file.""" |
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() + "\n" |
|
return text |
|
|
|
|
|
def analyze_paper(text: str, category: str) -> str: |
|
"""Generate a prompt and get analysis for a specific category.""" |
|
prompts = { |
|
"Journal": "In which journal was this research published:", |
|
"Journal Quality": "What is the quality or impact factor of the journal in which this research was published:", |
|
"No Of Citations": "How many times has this research paper been cited:", |
|
"Date Of Publications": "When was this research paper published:", |
|
"Title": "What is the title of this research paper:", |
|
"Abstract": "Provide a summarized version of the abstract of this paper:", |
|
"Author Keywords": "What keywords were provided by the authors for this research paper:", |
|
"Theories Used in The Paper": "What theories are utilized or referenced in this research paper:", |
|
"Context Used In the Paper": "What is the specific context or scenario used in this research:", |
|
"Methods and Material Used in This Paper": "What methods and materials are used in conducting this research:", |
|
"Antecedents and Problems": "What antecedents and problems are identified in this research:", |
|
"Decision and Frameworks To Solve the Problem": "What decision-making frameworks or solutions are proposed in this research:", |
|
"Outcomes": "What are the outcomes or results of this research:", |
|
"Study Findings": "What are the detailed findings of this research study:", |
|
"Conclusions": "What conclusions are drawn from this research:", |
|
"TSC ADO": "Provide details about the TSC ADO (Theory-Specific Constructs Applied in this research):" |
|
} |
|
|
|
if category in prompts: |
|
prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" |
|
else: |
|
prompt = f"Analyze the following text for the category '{category}':\n\nPaper text: {text[:5000]}" |
|
return call_perplexity_api(prompt) |
|
|
|
|
|
class ResearchAssistant: |
|
def __init__(self, perplexity_key: str): |
|
self.perplexity_key = perplexity_key |
|
|
|
def chat_with_pdf(self, pdf_text: str, query: str) -> Dict: |
|
chunks = self._split_text(pdf_text) |
|
relevant_chunks = self._get_relevant_chunks(chunks, query) |
|
|
|
prompt = f"Context from PDF:\n\n{relevant_chunks}\n\nQuestion: {query}" |
|
response_text = call_perplexity_api(prompt) |
|
return {"choices": [{"message": {"content": response_text}}]} |
|
|
|
def generate_literature_review(self, topic: str) -> Dict: |
|
try: |
|
|
|
papers = self._search_arxiv(topic) |
|
if not papers: |
|
return {"error": "No papers found on the topic"} |
|
|
|
|
|
papers_summary = "\n\n".join( |
|
[ |
|
f"Paper: {p['title']}\nAuthors: {', '.join(p['authors'])}\nSummary: {p['summary']}" |
|
for p in papers |
|
] |
|
) |
|
|
|
prompt = f"""Generate a comprehensive literature review on '{topic}'. Based on these papers: |
|
|
|
{papers_summary} |
|
|
|
Structure the review as follows: |
|
1. Introduction and Background |
|
2. Current Research Trends |
|
3. Key Findings and Themes |
|
4. Research Gaps |
|
5. Future Directions""" |
|
|
|
response_text = call_perplexity_api(prompt) |
|
return {"choices": [{"message": {"content": response_text}}]} |
|
except Exception as e: |
|
return {"error": f"Literature review generation failed: {str(e)}"} |
|
|
|
def ai_writer(self, outline: str, references: List[str]) -> Dict: |
|
prompt = f"""Write a research paper following this structure: |
|
|
|
Outline: |
|
{outline} |
|
|
|
References to incorporate: |
|
{json.dumps(references)} |
|
|
|
Instructions: |
|
- Follow academic writing style |
|
- Include appropriate citations |
|
- Maintain logical flow |
|
- Include introduction and conclusion""" |
|
|
|
response_text = call_perplexity_api(prompt) |
|
return {"choices": [{"message": {"content": response_text}}]} |
|
|
|
def refine_response(self, response: str, column: str) -> str: |
|
prompt = f"""Refine the following response to fit the '{column}' column in a research paper CSV format: |
|
|
|
Response: {response} |
|
|
|
Ensure the response is clear, concise, and fits the context of the column.""" |
|
|
|
refined_response = call_perplexity_api(prompt) |
|
return refined_response |
|
|
|
def paraphrase(self, text: str) -> Dict: |
|
prompt = f"""Paraphrase the following text while: |
|
- Maintaining academic tone |
|
- Preserving key meaning |
|
- Improving clarity |
|
|
|
Text: {text}""" |
|
|
|
response_text = call_perplexity_api(prompt) |
|
return {"choices": [{"message": {"content": response_text}}]} |
|
|
|
def generate_citation(self, paper_info: Dict, style: str = "APA") -> Dict: |
|
prompt = f"""Generate a {style} citation for: |
|
Title: {paper_info['title']} |
|
Authors: {', '.join(paper_info['authors'])} |
|
Year: {paper_info['year']} |
|
|
|
Follow exact {style} format guidelines.""" |
|
|
|
response_text = call_perplexity_api(prompt) |
|
return {"citation": response_text} |
|
|
|
def detect_ai_content(self, text: str) -> Dict: |
|
prompt = f"""You are an AI content detector. Analyze the text for: |
|
1. Writing style consistency |
|
2. Language patterns |
|
3. Contextual coherence |
|
4. Common AI patterns |
|
Provide a clear analysis with confidence level. |
|
|
|
Text: {text}""" |
|
|
|
response = requests.post( |
|
"https://api.sapling.ai/api/v1/aidetect", |
|
json={"key": SAPLING_API_KEY, "text": text}, |
|
) |
|
st.info( |
|
"A score from 0 to 1 will be returned, with 0 indicating the maximum confidence that the text is human-written, and 1 indicating the maximum confidence that the text is AI-generated." |
|
) |
|
|
|
if response.status_code == 200: |
|
return {"choices": [{"message": {"content": response.json()}}]} |
|
else: |
|
return { |
|
"error": f"Sapling API Error: {response.status_code} - {response.text}" |
|
} |
|
|
|
def _split_text(self, text: str) -> List[str]: |
|
splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " ", ""] |
|
) |
|
return splitter.split_text(text) |
|
|
|
def _get_relevant_chunks(self, chunks: List[str], query: str) -> str: |
|
|
|
query_words = set(query.lower().split()) |
|
scored_chunks = [] |
|
|
|
for chunk in chunks: |
|
chunk_words = set(chunk.lower().split()) |
|
score = len(query_words.intersection(chunk_words)) |
|
scored_chunks.append((score, chunk)) |
|
|
|
scored_chunks.sort(reverse=True) |
|
return "\n\n".join(chunk for _, chunk in scored_chunks[:3]) |
|
|
|
def _search_arxiv(self, topic: str) -> List[Dict]: |
|
try: |
|
query = "+AND+".join(topic.split()) |
|
url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5" |
|
response = requests.get(url, timeout=10) |
|
response.raise_for_status() |
|
return self._parse_arxiv_response(response.text) |
|
except Exception as e: |
|
print(f"arXiv search failed: {str(e)}") |
|
return [] |
|
|
|
def _parse_arxiv_response(self, response_text: str) -> List[Dict]: |
|
try: |
|
root = ET.fromstring(response_text) |
|
papers = [] |
|
for entry in root.findall("{http://www.w3.org/2005/Atom}entry"): |
|
paper = { |
|
"id": entry.find("{http://www.w3.org/2005/Atom}id").text, |
|
"title": entry.find( |
|
"{http://www.w3.org/2005/Atom}title" |
|
).text.strip(), |
|
"summary": entry.find( |
|
"{http://www.w3.org/2005/Atom}summary" |
|
).text.strip(), |
|
"authors": [ |
|
author.find("{http://www.w3.org/2005/Atom}name").text.strip() |
|
for author in entry.findall( |
|
"{http://www.w3.org/2005/Atom}author" |
|
) |
|
], |
|
"published": entry.find( |
|
"{http://www.w3.org/2005/Atom}published" |
|
).text[:10], |
|
} |
|
papers.append(paper) |
|
return papers |
|
except Exception as e: |
|
print(f"arXiv response parsing failed: {str(e)}") |
|
return [] |
|
|
|
|
|
def main(): |
|
|
|
st.title("Research Copilot") |
|
|
|
if not PERPLEXITY_API_KEY: |
|
st.warning("Perplexity API key not found in environment variables.") |
|
return |
|
|
|
assistant = ResearchAssistant(PERPLEXITY_API_KEY) |
|
|
|
tabs = st.tabs( |
|
[ |
|
"Chat with PDF", |
|
"Literature Review", |
|
"AI Writer", |
|
"Extract Data", |
|
"Paraphraser", |
|
"Citation Generator", |
|
"AI Detector", |
|
] |
|
) |
|
|
|
with tabs[0]: |
|
st.header("Chat with PDF") |
|
|
|
|
|
col1, col2 = st.columns([3, 1]) |
|
with col1: |
|
uploaded_file = st.file_uploader("Upload PDF", type="pdf", key="pdf_chat") |
|
with col2: |
|
if st.button("Clear PDF"): |
|
st.session_state.pop("pdf_text", None) |
|
st.rerun() |
|
|
|
if uploaded_file: |
|
if "pdf_text" not in st.session_state: |
|
with st.spinner("Processing PDF..."): |
|
reader = PyPDF2.PdfReader(uploaded_file) |
|
st.session_state.pdf_text = "" |
|
for page in reader.pages: |
|
st.session_state.pdf_text += page.extract_text() |
|
st.success("PDF processed successfully!") |
|
|
|
query = st.text_input("Ask a question about the PDF") |
|
if query: |
|
with st.spinner("Analyzing..."): |
|
response = assistant.chat_with_pdf(st.session_state.pdf_text, query) |
|
if "error" in response: |
|
st.error(response["error"]) |
|
else: |
|
st.write(response["choices"][0]["message"]["content"]) |
|
|
|
with tabs[1]: |
|
st.header("Literature Review") |
|
topic = st.text_input("Enter research topic") |
|
if st.button("Generate Review") and topic: |
|
with st.spinner("Generating literature review..."): |
|
review = assistant.generate_literature_review(topic) |
|
if "error" in review: |
|
st.error(review["error"]) |
|
else: |
|
st.write(review["choices"][0]["message"]["content"]) |
|
|
|
with tabs[2]: |
|
st.header("AI Writer") |
|
outline = st.text_area("Enter paper outline") |
|
references = st.text_area("Enter references (one per line)") |
|
if st.button("Generate Paper") and outline: |
|
with st.spinner("Writing paper..."): |
|
paper = assistant.ai_writer(outline, references.split("\n")) |
|
if "error" in paper: |
|
st.error(paper["error"]) |
|
else: |
|
st.write(paper["choices"][0]["message"]["content"]) |
|
|
|
with tabs[3]: |
|
st.header("Extract Data") |
|
|
|
uploaded_files = st.file_uploader( |
|
"Upload multiple PDF files", type="pdf", accept_multiple_files=True |
|
) |
|
if 'categories' not in st.session_state: |
|
st.session_state.categories = [ |
|
"Journal", "Journal Quality", "No Of Citations", |
|
"Date Of Publications", "Title", "Abstract", "Author Keywords", |
|
"Theories Used in The Paper", "Context Used In the Paper", "Methods and Material Used in This Paper", |
|
"Antecedents and Problems", "Decision and Frameworks To Solve the Problem", "Outcomes", |
|
"Study Findings", "Conclusions", |
|
"TSC ADO" |
|
] |
|
|
|
st.write("### Current Categories") |
|
st.write(st.session_state.categories) |
|
|
|
|
|
new_category = st.text_input("Add a new category") |
|
|
|
if st.button("Add Category"): |
|
if new_category.strip(): |
|
if new_category not in st.session_state.categories: |
|
st.session_state.categories.append(new_category) |
|
st.success(f"Category '{new_category}' added!") |
|
else: |
|
st.warning(f"Category '{new_category}' already exists!") |
|
else: |
|
st.error("Category cannot be empty!") |
|
|
|
|
|
|
|
st.write("### Updated Categories") |
|
st.write(st.session_state.categories) |
|
|
|
|
|
if uploaded_files: |
|
if st.button("Process Papers"): |
|
|
|
progress_bar = st.progress(0) |
|
status_text = st.empty() |
|
|
|
|
|
results = [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for i, file in enumerate(uploaded_files): |
|
status_text.text(f"Processing {file.name}...") |
|
|
|
|
|
text = extract_text_from_pdf(file) |
|
|
|
|
|
paper_results = {"Filename": file.name} |
|
|
|
|
|
for j, category in enumerate(st.session_state.categories): |
|
status_text.text(f"Processing {file.name} - {category}") |
|
paper_results[category] = analyze_paper(text, category) |
|
|
|
|
|
progress = (i * len(st.session_state.categories) + j + 1) / ( |
|
len(uploaded_files) * len(st.session_state.categories) |
|
) |
|
progress_bar.progress(progress) |
|
|
|
|
|
time.sleep(1) |
|
|
|
results.append(paper_results) |
|
|
|
|
|
df = pd.DataFrame(results) |
|
|
|
|
|
csv = df.to_csv(index=False) |
|
|
|
|
|
st.download_button( |
|
label="Download Results as CSV", |
|
data=csv, |
|
file_name="research_papers_analysis.csv", |
|
mime="text/csv", |
|
) |
|
|
|
|
|
st.subheader("Analysis Results") |
|
st.dataframe(df) |
|
|
|
status_text.text("Processing complete!") |
|
progress_bar.progress(1.0) |
|
|
|
with tabs[4]: |
|
st.header("Paraphraser") |
|
text = st.text_area("Enter text to paraphrase") |
|
if st.button("Paraphrase") and text: |
|
with st.spinner("Paraphrasing..."): |
|
result = assistant.paraphrase(text) |
|
if "error" in result: |
|
st.error(result["error"]) |
|
else: |
|
st.write(result["choices"][0]["message"]["content"]) |
|
|
|
with tabs[5]: |
|
st.header("Citation Generator") |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
title = st.text_input("Paper Title") |
|
authors = st.text_input("Authors (comma-separated)") |
|
with col2: |
|
year = st.text_input("Year") |
|
style = st.selectbox("Citation Style", ["APA", "MLA", "Chicago"]) |
|
|
|
if st.button("Generate Citation") and title: |
|
with st.spinner("Generating citation..."): |
|
citation = assistant.generate_citation( |
|
{ |
|
"title": title, |
|
"authors": [a.strip() for a in authors.split(",")], |
|
"year": year, |
|
}, |
|
style, |
|
) |
|
if "error" in citation: |
|
st.error(citation["error"]) |
|
else: |
|
st.code(citation["citation"], language="text") |
|
|
|
with tabs[6]: |
|
st.header("AI Detector") |
|
text = st.text_area("Enter text to analyze") |
|
if st.button("Detect AI Content") and text: |
|
with st.spinner("Analyzing..."): |
|
result = assistant.detect_ai_content(text) |
|
if "error" in result: |
|
st.error(result["error"]) |
|
else: |
|
st.write(result["choices"][0]["message"]["content"]) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|