import streamlit as st import requests import PyPDF2 from typing import Optional, Dict, List import json from langchain.text_splitter import RecursiveCharacterTextSplitter from concurrent.futures import ThreadPoolExecutor import xml.etree.ElementTree as ET import re from datetime import datetime import time from dotenv import load_dotenv import os import pandas as pd # Load environment variables load_dotenv() PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY") PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions" SAPLING_API_KEY = os.getenv("SAPLING_API_KEY") def call_perplexity_api(prompt: str) -> str: """Call Perplexity AI with a prompt, return the text response if successful.""" headers = { "Authorization": f"Bearer {PERPLEXITY_API_KEY}", "Content-Type": "application/json", } payload = { "model": "llama-3.1-sonar-small-128k-chat", "messages": [{"role": "user", "content": prompt}], "temperature": 0.3, } try: response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload) response.raise_for_status() return response.json()["choices"][0]["message"]["content"] except Exception as e: st.error(f"API Error: {str(e)}") return "" def extract_text_from_pdf(pdf_file): """Extract text content from a PDF file.""" pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page in pdf_reader.pages: text += page.extract_text() + "\n" return text def analyze_paper(text: str, category: str) -> str: """Generate a prompt and get analysis for a specific category.""" prompts = { "Summarized Abstract": "Extract and summarize the abstract from this research paper:", "Results": "What are the main results and findings from this research paper:", "Summarized Introduction": "Summarize the introduction section of this research paper:", "Methods Used": "What are the main methods and methodologies used in this research:", "Literature Survey": "Summarize the literature review or related work from this paper:", "Limitations": "What are the limitations mentioned in this research:", "Contributions": "What are the main contributions of this research:", "Practical Implications": "What are the practical implications of this research:", "Objectives": "What are the main objectives of this research:", "Findings": "What are the key findings from this research:", "Future Research": "What future research directions are suggested in this paper:", "Dependent Variables": "What are the dependent variables studied in this research:", "Independent Variables": "What are the independent variables studied in this research:", "Dataset": "What dataset(s) were used in this research:", "Problem Statement": "What is the main problem statement or research question:", "Challenges": "What challenges were faced or addressed in this research:", "Applications": "What are the potential applications of this research:", } prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits return call_perplexity_api(prompt) class ResearchAssistant: def __init__(self, perplexity_key: str): self.perplexity_key = perplexity_key def chat_with_pdf(self, pdf_text: str, query: str) -> Dict: chunks = self._split_text(pdf_text) relevant_chunks = self._get_relevant_chunks(chunks, query) prompt = f"Context from PDF:\n\n{relevant_chunks}\n\nQuestion: {query}" response_text = call_perplexity_api(prompt) return {"choices": [{"message": {"content": response_text}}]} def generate_literature_review(self, topic: str) -> Dict: try: # Search arXiv for papers papers = self._search_arxiv(topic) if not papers: return {"error": "No papers found on the topic"} # Format paper information papers_summary = "\n\n".join( [ f"Paper: {p['title']}\nAuthors: {', '.join(p['authors'])}\nSummary: {p['summary']}" for p in papers ] ) prompt = f"""Generate a comprehensive literature review on '{topic}'. Based on these papers: {papers_summary} Structure the review as follows: 1. Introduction and Background 2. Current Research Trends 3. Key Findings and Themes 4. Research Gaps 5. Future Directions""" response_text = call_perplexity_api(prompt) return {"choices": [{"message": {"content": response_text}}]} except Exception as e: return {"error": f"Literature review generation failed: {str(e)}"} def ai_writer(self, outline: str, references: List[str]) -> Dict: prompt = f"""Write a research paper following this structure: Outline: {outline} References to incorporate: {json.dumps(references)} Instructions: - Follow academic writing style - Include appropriate citations - Maintain logical flow - Include introduction and conclusion""" response_text = call_perplexity_api(prompt) return {"choices": [{"message": {"content": response_text}}]} def refine_response(self, response: str, column: str) -> str: prompt = f"""Refine the following response to fit the '{column}' column in a research paper CSV format: Response: {response} Ensure the response is clear, concise, and fits the context of the column.""" refined_response = call_perplexity_api(prompt) return refined_response def paraphrase(self, text: str) -> Dict: prompt = f"""Paraphrase the following text while: - Maintaining academic tone - Preserving key meaning - Improving clarity Text: {text}""" response_text = call_perplexity_api(prompt) return {"choices": [{"message": {"content": response_text}}]} def generate_citation(self, paper_info: Dict, style: str = "APA") -> Dict: prompt = f"""Generate a {style} citation for: Title: {paper_info['title']} Authors: {', '.join(paper_info['authors'])} Year: {paper_info['year']} Follow exact {style} format guidelines.""" response_text = call_perplexity_api(prompt) return {"citation": response_text} def detect_ai_content(self, text: str) -> Dict: prompt = f"""You are an AI content detector. Analyze the text for: 1. Writing style consistency 2. Language patterns 3. Contextual coherence 4. Common AI patterns Provide a clear analysis with confidence level. Text: {text}""" response = requests.post( "https://api.sapling.ai/api/v1/aidetect", json={"key": SAPLING_API_KEY, "text": text}, ) st.info( "A score from 0 to 1 will be returned, with 0 indicating the maximum confidence that the text is human-written, and 1 indicating the maximum confidence that the text is AI-generated." ) if response.status_code == 200: return {"choices": [{"message": {"content": response.json()}}]} else: return { "error": f"Sapling API Error: {response.status_code} - {response.text}" } def _split_text(self, text: str) -> List[str]: splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " ", ""] ) return splitter.split_text(text) def _get_relevant_chunks(self, chunks: List[str], query: str) -> str: # Simple keyword-based relevance scoring query_words = set(query.lower().split()) scored_chunks = [] for chunk in chunks: chunk_words = set(chunk.lower().split()) score = len(query_words.intersection(chunk_words)) scored_chunks.append((score, chunk)) scored_chunks.sort(reverse=True) return "\n\n".join(chunk for _, chunk in scored_chunks[:3]) def _search_arxiv(self, topic: str) -> List[Dict]: try: query = "+AND+".join(topic.split()) url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5" response = requests.get(url, timeout=10) response.raise_for_status() return self._parse_arxiv_response(response.text) except Exception as e: print(f"arXiv search failed: {str(e)}") return [] def _parse_arxiv_response(self, response_text: str) -> List[Dict]: try: root = ET.fromstring(response_text) papers = [] for entry in root.findall("{http://www.w3.org/2005/Atom}entry"): paper = { "id": entry.find("{http://www.w3.org/2005/Atom}id").text, "title": entry.find( "{http://www.w3.org/2005/Atom}title" ).text.strip(), "summary": entry.find( "{http://www.w3.org/2005/Atom}summary" ).text.strip(), "authors": [ author.find("{http://www.w3.org/2005/Atom}name").text.strip() for author in entry.findall( "{http://www.w3.org/2005/Atom}author" ) ], "published": entry.find( "{http://www.w3.org/2005/Atom}published" ).text[:10], } papers.append(paper) return papers except Exception as e: print(f"arXiv response parsing failed: {str(e)}") return [] def main(): # st.set_page_config(page_title="Research Assistant", layout="wide") st.title("Research Copilot") if not PERPLEXITY_API_KEY: st.warning("Perplexity API key not found in environment variables.") return assistant = ResearchAssistant(PERPLEXITY_API_KEY) tabs = st.tabs( [ "Chat with PDF", "Literature Review", "AI Writer", "Extract Data", "Paraphraser", "Citation Generator", "AI Detector", ] ) with tabs[0]: # Chat with PDF st.header("Chat with PDF") # File uploader with clear button col1, col2 = st.columns([3, 1]) with col1: uploaded_file = st.file_uploader("Upload PDF", type="pdf", key="pdf_chat") with col2: if st.button("Clear PDF"): st.session_state.pop("pdf_text", None) st.rerun() if uploaded_file: if "pdf_text" not in st.session_state: with st.spinner("Processing PDF..."): reader = PyPDF2.PdfReader(uploaded_file) st.session_state.pdf_text = "" for page in reader.pages: st.session_state.pdf_text += page.extract_text() st.success("PDF processed successfully!") query = st.text_input("Ask a question about the PDF") if query: with st.spinner("Analyzing..."): response = assistant.chat_with_pdf(st.session_state.pdf_text, query) if "error" in response: st.error(response["error"]) else: st.write(response["choices"][0]["message"]["content"]) with tabs[1]: # Literature Review st.header("Literature Review") topic = st.text_input("Enter research topic") if st.button("Generate Review") and topic: with st.spinner("Generating literature review..."): review = assistant.generate_literature_review(topic) if "error" in review: st.error(review["error"]) else: st.write(review["choices"][0]["message"]["content"]) with tabs[2]: # AI Writer st.header("AI Writer") outline = st.text_area("Enter paper outline") references = st.text_area("Enter references (one per line)") if st.button("Generate Paper") and outline: with st.spinner("Writing paper..."): paper = assistant.ai_writer(outline, references.split("\n")) if "error" in paper: st.error(paper["error"]) else: st.write(paper["choices"][0]["message"]["content"]) with tabs[3]: # Extract Data st.header("Extract Data") uploaded_files = st.file_uploader( "Upload multiple PDF files", type="pdf", accept_multiple_files=True ) if uploaded_files: if st.button("Process Papers"): # Initialize progress bar progress_bar = st.progress(0) status_text = st.empty() # Initialize results dictionary results = [] # Define categories categories = [ "Summarized Abstract", "Results", "Summarized Introduction", "Methods Used", "Literature Survey", "Limitations", "Contributions", "Practical Implications", "Objectives", "Findings", "Future Research", "Dependent Variables", "Independent Variables", "Dataset", "Problem Statement", "Challenges", "Applications", ] # Process each file for i, file in enumerate(uploaded_files): status_text.text(f"Processing {file.name}...") # Extract text from PDF text = extract_text_from_pdf(file) # Initialize paper results paper_results = {"Filename": file.name} # Analyze each category for j, category in enumerate(categories): status_text.text(f"Processing {file.name} - {category}") paper_results[category] = analyze_paper(text, category) # Update progress progress = (i * len(categories) + j + 1) / ( len(uploaded_files) * len(categories) ) progress_bar.progress(progress) # Add small delay to avoid API rate limits time.sleep(1) results.append(paper_results) # Create DataFrame df = pd.DataFrame(results) # Convert DataFrame to CSV csv = df.to_csv(index=False) # Create download button st.download_button( label="Download Results as CSV", data=csv, file_name="research_papers_analysis.csv", mime="text/csv", ) # Display results in the app st.subheader("Analysis Results") st.dataframe(df) status_text.text("Processing complete!") progress_bar.progress(1.0) with tabs[4]: # Paraphraser st.header("Paraphraser") text = st.text_area("Enter text to paraphrase") if st.button("Paraphrase") and text: with st.spinner("Paraphrasing..."): result = assistant.paraphrase(text) if "error" in result: st.error(result["error"]) else: st.write(result["choices"][0]["message"]["content"]) with tabs[5]: # Citation Generator st.header("Citation Generator") col1, col2 = st.columns(2) with col1: title = st.text_input("Paper Title") authors = st.text_input("Authors (comma-separated)") with col2: year = st.text_input("Year") style = st.selectbox("Citation Style", ["APA", "MLA", "Chicago"]) if st.button("Generate Citation") and title: with st.spinner("Generating citation..."): citation = assistant.generate_citation( { "title": title, "authors": [a.strip() for a in authors.split(",")], "year": year, }, style, ) if "error" in citation: st.error(citation["error"]) else: st.code(citation["citation"], language="text") with tabs[6]: # AI Detector st.header("AI Detector") text = st.text_area("Enter text to analyze") if st.button("Detect AI Content") and text: with st.spinner("Analyzing..."): result = assistant.detect_ai_content(text) if "error" in result: st.error(result["error"]) else: st.write(result["choices"][0]["message"]["content"]) if __name__ == "__main__": main()