# import streamlit as st # from openai import OpenAI # import os # from dotenv import load_dotenv # from llama_index.core import ( # VectorStoreIndex, # SimpleDirectoryReader, # Document, # GPTVectorStoreIndex, # ) # from bson import ObjectId # import requests # import openai # import numpy as np # from pymongo import MongoClient # from bson import ObjectId # from datetime import datetime # from llama_index.embeddings.openai import OpenAIEmbedding # from typing import List, Dict # # Initialize Perplexity API and OpenAI API # load_dotenv() # perplexity_api_key = os.getenv("PERPLEXITY_KEY") # openai.api_key = os.getenv("OPENAI_KEY") # # MongoDB setup # MONGO_URI = os.getenv("MONGO_URI") # client = MongoClient(MONGO_URI) # db = client["novascholar_db"] # research_papers_collection = db["research_papers"] # def fetch_perplexity_data(api_key, topic): # """ # Fetch research papers data from Perplexity API with proper formatting # """ # headers = { # "accept": "application/json", # "content-type": "application/json", # "authorization": f"Bearer {api_key}", # } # # Structured prompt to get properly formatted response # messages = [ # { # "role": "system", # "content": """You are a research paper retrieval expert. For the given topic, return exactly 10 research papers in the following format: # Title: Paper Title # Authors: Author 1, Author 2 # Year: YYYY # Content: Detailed paper content with abstract and key findings # URL: DOI or paper URL # """, # }, # {"role": "user", "content": f"Find 10 research papers about: {topic}"}, # ] # try: # client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai") # response = client.chat.completions.create( # model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model # messages=messages, # ) # # Extract and validate response # content = response.choices[0].message.content # st.write("Fetched Data:", content) # Debugging line to check the fetched data # return content # except Exception as e: # st.error(f"Failed to fetch data from Perplexity API: {str(e)}") # return "" # def split_and_vectorize_papers(content: str) -> List[Dict]: # """Split and vectorize papers using OpenAI embeddings""" # papers = content.split("\n\n") # # Initialize OpenAI client # # client = OpenAI() # Uses api_key from environment variable # vectors = [] # for paper in papers: # try: # # Get embedding using OpenAI's API directly # response = openai.embeddings.create( # model="text-embedding-ada-002", input=paper, encoding_format="float" # ) # # Extract embedding from response # embedding = response.data[0].embedding # vectors.append( # {"content": paper, "vector": embedding, "timestamp": datetime.utcnow()} # ) # except Exception as e: # st.error(f"Error vectorizing paper: {str(e)}") # continue # return vectors # def store_papers_in_mongodb(papers): # """Store papers with vectors in MongoDB""" # try: # for paper in papers: # # Prepare MongoDB document # mongo_doc = { # "content": paper["content"], # "vector": paper["vector"], # "created_at": datetime.utcnow(), # } # # Insert into MongoDB # db.papers.update_one( # {"content": paper["content"]}, {"$set": mongo_doc}, upsert=True # ) # st.success(f"Stored {len(papers)} papers in database") # return True # except Exception as e: # st.error(f"Error storing papers: {str(e)}") # def get_research_papers(query): # """ # Get and store research papers with improved error handling # """ # # Fetch papers from Perplexity # content = fetch_perplexity_data(perplexity_api_key, query) # if not content: # return [] # # Split and vectorize papers # papers = split_and_vectorize_papers(content) # # Store papers in MongoDB # if store_papers_in_mongodb(papers): # return papers # else: # st.warning("Failed to store papers in database, but returning fetched results") # return papers # def analyze_research_gaps(papers): # """ # Analyze research gaps with improved prompt and error handling # """ # if not papers: # return "No papers provided for analysis" # # Prepare paper summaries for analysis # paper_summaries = "\n\n".join( # [ # f"Key Findings: {paper['content'][:500]}..." # # f"Title: {paper['title']}\nYear: {paper['year']}\nKey Findings: {paper['content'][:500]}..." # for paper in papers # ] # ) # headers = { # "Authorization": f"Bearer {perplexity_api_key}", # "Content-Type": "application/json", # } # data = { # "messages": [ # { # "role": "system", # "content": "You are a research analysis expert. Identify specific research gaps and future research directions based on the provided papers. Format your response with clear sections: Current State, Identified Gaps, and Future Directions.", # }, # { # "role": "user", # "content": f"Analyze these papers and identify research gaps:\n\n{paper_summaries}", # }, # ] # } # try: # client = OpenAI( # api_key=perplexity_api_key, base_url="https://api.perplexity.ai" # ) # response = client.chat.completions.create( # model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model # messages=data["messages"], # ) # return response.choices[0].message.content # except Exception as e: # st.error(f"Failed to analyze research gaps: {str(e)}") # return "Error analyzing research gaps" # def create_research_paper(gaps, topic, papers): # """ # Create a research paper that addresses the identified gaps using Perplexity API # """ # full_texts = "\n\n".join([paper["content"] for paper in papers]) # headers = { # "Authorization": f"Bearer {perplexity_api_key}", # "Content-Type": "application/json", # } # data = { # "messages": [ # { # "role": "system", # "content": "You are a research paper generation expert. Create a comprehensive research paper that addresses the identified gaps based on the provided papers. Format your response with clear sections: Introduction, Literature Review, Methodology, Results, Discussion, Conclusion, and References.", # }, # { # "role": "user", # "content": f"Create a research paper on the topic '{topic}' that addresses the following research gaps:\n\n{gaps}\n\nBased on the following papers:\n\n{full_texts}", # }, # ] # } # try: # client = OpenAI( # api_key=perplexity_api_key, base_url="https://api.perplexity.ai" # ) # response = client.chat.completions.create( # model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model # messages=data["messages"], # ) # return response.choices[0].message.content # except Exception as e: # st.error(f"Failed to create research paper: {str(e)}") # return "Error creating research paper" # def cosine_similarity(vec1, vec2): # """Calculate the cosine similarity between two vectors""" # vec1 = np.array(vec1) # vec2 = np.array(vec2) # return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) # def calculate_cosine_similarity(vec1: List[float], vec2: List[float]) -> float: # """Calculate cosine similarity between two vectors""" # return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) # def display_research_assistant_dashboard(): # """Display research assistant dashboard""" # # Initialize session state for recommendations # if "recommendations" not in st.session_state: # st.session_state.recommendations = None # if "vectors" not in st.session_state: # st.session_state.vectors = None # if "generated_paper" not in st.session_state: # st.session_state.generated_paper = None # # Sidebar # with st.sidebar: # st.title(f"Welcome, {st.session_state.username}") # if st.button("Logout", use_container_width=True): # for key in st.session_state.keys(): # del st.session_state[key] # st.rerun() # # Main content # st.title("Research Paper Recommendations") # search_query = st.text_input("Enter research topic:") # col1, col2 = st.columns(2) # with col1: # if st.button("Get Research Papers"): # if search_query: # with st.spinner("Fetching recommendations..."): # st.session_state.recommendations = get_research_papers(search_query) # st.session_state.vectors = [ # paper["vector"] for paper in st.session_state.recommendations # ] # st.markdown( # "\n\n".join( # [ # f"**{i+1}.**\n{paper['content']}" # # f"**{i+1}. {paper['title']}**\n{paper['content']}" # for i, paper in enumerate( # st.session_state.recommendations # ) # ] # ) # ) # else: # st.warning("Please enter a search query") # with col2: # if st.button("Analyze Research Gaps"): # if st.session_state.recommendations: # with st.spinner("Analyzing research gaps..."): # gaps = analyze_research_gaps(st.session_state.recommendations) # st.session_state.generated_paper = create_research_paper( # gaps, search_query, st.session_state.recommendations # ) # st.markdown("### Potential Research Gaps") # st.markdown(gaps) # else: # st.warning("Please get research papers first") # if st.button("Save and Vectorize"): # if st.session_state.generated_paper: # try: # # Initialize OpenAI client # # Get embedding for generated paper # response = openai.embeddings.create( # model="text-embedding-ada-002", # input=st.session_state.generated_paper, # encoding_format="float", # ) # generated_vector = response.data[0].embedding # # Calculate similarities with stored vectors # similarities = [ # calculate_cosine_similarity(generated_vector, paper_vector) # for paper_vector in st.session_state.vectors # ] # # Display results # st.markdown("### Generated Research Paper") # st.markdown(st.session_state.generated_paper) # st.markdown("### Cosine Similarities with Original Papers") # for i, similarity in enumerate(similarities): # st.metric( # f"Paper {i+1}", # value=f"{similarity:.3f}", # help="Cosine similarity (1.0 = identical, 0.0 = completely different)", # ) # except Exception as e: # st.error(f"Error during vectorization: {str(e)}") # else: # st.warning("Please analyze research gaps first") # # Run the dashboard # if __name__ == "__main__": # display_research_assistant_dashboard() import research_combine2 # if __name__ == "__main__": # display_research_assistant_dashboard() def display_research_assistant_dashboard(): research_combine2.display_research_assistant_dashboard()