Spaces:

SPJIMR-Internship
/

SPJIMR_FlipClassroom_RCopilot_ResearchInternship

Runtime error

SPJIMR_FlipClassroom_RCopilot_ResearchInternship

File size: 15,313 Bytes

# import streamlit as st
# import pandas as pd
# import networkx as nx
# from bokeh.models import HoverTool
# from bokeh.plotting import figure, from_networkx
# import requests
# import json
# import google.generativeai as genai

# PERPLEXITY_API_KEY = "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"
# PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"


# def extract_edges(keywords):
#     keywords = [kw.strip() for kw in keywords.split(",")]
#     edges = [
#         (keywords[i], keywords[j])
#         for i in range(len(keywords))
#         for j in range(i + 1, len(keywords))
#     ]
#     return edges


# def create_knowledge_graph(data):
#     G = nx.Graph()

#     for _, row in data.iterrows():
#         words = []
#         for col in data.columns:
#             if pd.notnull(row[col]):
#                 # Convert to string and handle numeric values
#                 cell_value = str(row[col]).strip()
#                 if cell_value:
#                     words.extend(cell_value.split())

#         if words:
#             edges = extract_edges(",".join(words))
#             G.add_edges_from(edges)

#             for word in words:
#                 word = word.strip()
#                 if word not in G:
#                     G.add_node(word, title=word, value=len(word))

#     return G


# def render_graph_bokeh(G):
#     plot = figure(
#         title="Interactive Knowledge Graph",
#         x_range=(-1.5, 1.5),
#         y_range=(-1.5, 1.5),
#         tools="pan,wheel_zoom,box_zoom,reset,tap",
#         active_scroll="wheel_zoom",
#     )
#     plot.add_tools(HoverTool(tooltips="@index"))

#     graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0, 0))

#     graph_renderer.node_renderer.glyph.size = 10
#     graph_renderer.node_renderer.glyph.fill_color = "blue"
#     graph_renderer.node_renderer.glyph.line_color = "black"

#     graph_renderer.edge_renderer.glyph.line_width = 1
#     graph_renderer.edge_renderer.glyph.line_color = "gray"

#     plot.renderers.append(graph_renderer)

#     return plot


# import re


# def search_papers(topic: str, num_papers: int) -> list:
#     headers = {
#         "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
#         "Content-Type": "application/json",
#     }

#     prompt = f"""Find {num_papers} recent research papers about {topic}.
#     Return ONLY a valid JSON array with the following structure for each paper:
#     [
#         {{
#             "Title": "paper title",
#             "Abstract": "abstract text",
#             "Keywords": "key terms"
#         }}
#     ]"""

#     payload = {
#         "model": "llama-3.1-sonar-small-128k-chat",
#         "messages": [
#             {
#                 "role": "system",
#                 "content": "You are a research paper analyzer that returns valid JSON arrays.",
#             },
#             {"role": "user", "content": prompt},
#         ],
#         "temperature": 0.1,
#     }

#     try:
#         response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
#         response.raise_for_status()
#         content = response.json()["choices"][0]["message"]["content"]

#         # Clean response to ensure valid JSON
#         content = content.strip()
#         if not content.startswith("["):
#             content = content[content.find("[") :]
#         if not content.endswith("]"):
#             content = content[: content.rfind("]") + 1]

#         # Remove any trailing commas before closing brackets
#         content = re.sub(r",\s*]", "]", content)
#         content = re.sub(r",\s*}", "}", content)

#         papers = json.loads(content)
#         if not isinstance(papers, list):
#             raise ValueError("Response is not a JSON array")
#         return papers
#     except requests.exceptions.RequestException as e:
#         st.error(f"API Request Error: {str(e)}")
#         return []
#     except json.JSONDecodeError as e:
#         st.error(f"Invalid JSON response: {str(e)}")
#         st.error(f"Response content: {response.text}")
#         return []
#     except ValueError as e:
#         st.error(f"Error: {str(e)}")
#         return []


# import os

# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"


# def call_gemini_api(prompt: str) -> str:
#     headers = {
#         "Authorization": f"Bearer {GEMINI_API_KEY}",
#         "Content-Type": "application/json",
#     }

#     payload = {
#         "prompt": prompt,
#         "max_tokens": 150,
#         "temperature": 0.7,
#     }

#     try:
#         model = genai.GenerativeModel("gemini-pro")
#         response = model.generate_content(prompt)
#         return response.text
#     except Exception as e:
#         st.error(f"Gemini API Error: {str(e)}")
#         return ""


# def generate_gaps_paragraph(gaps):
#     prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps: {', '.join(gaps)}"
#     return call_gemini_api(prompt)


# def generate_insights(G, topic):
#     papers = search_papers(topic, 5)
#     if papers:
#         st.write("### Research Insights from Perplexity API")
#         for paper in papers:
#             st.write(f"**Title:** {paper['Title']}")
#             st.write(f"**Abstract:** {paper['Abstract']}")
#             st.write(f"**Keywords:** {paper['Keywords']}")
#             st.write("---")

#     nodes = list(G.nodes(data=True))
#     insights = {}
#     insights["Strong Points"] = [
#         n for n, d in nodes if G.degree(n) > len(G.nodes) * 0.1
#     ]
#     insights["Weak Points"] = [n for n, d in nodes if G.degree(n) < len(G.nodes) * 0.05]
#     insights["Gaps"] = [n for n, d in nodes if len(list(nx.neighbors(G, n))) == 0]

#     st.write("### Graph-Based Insights")
#     st.write("**Strong Points:**", insights["Strong Points"])
#     st.write("**Weak Points:**", insights["Weak Points"])
#     st.write("**Gaps:**", insights["Gaps"])

#     if insights["Gaps"]:
#         with st.spinner("Generating insights about gaps..."):
#             gaps_paragraph = generate_gaps_paragraph(insights["Gaps"])
#             if gaps_paragraph:
#                 st.write("### Gaps in Research")
#                 st.write(gaps_paragraph)


# def main():
#     st.title("Advanced Interactive Knowledge Graph")
#     st.write(
#         "Upload a CSV file to generate a fully interactive and insightful knowledge graph."
#     )

#     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

#     if uploaded_file is not None:
#         try:
#             data = pd.read_csv(uploaded_file)
#             st.write("Preview of the uploaded data:")
#             st.dataframe(data.head())

#             G = create_knowledge_graph(data)

#             st.write("Generated Knowledge Graph:")
#             plot = render_graph_bokeh(G)
#             st.bokeh_chart(plot, use_container_width=True)

#             topic = st.text_input(
#                 "Enter a topic for additional insights:", "knowledge graphs"
#             )
#             if topic:
#                 generate_insights(G, topic)

#         except Exception as e:
#             st.error(f"An error occurred while processing the file: {e}")
#     else:
#         st.info("Please upload a CSV file to get started.")


# if __name__ == "__main__":
#     main()
import streamlit as st
import pandas as pd
import networkx as nx
from bokeh.models import HoverTool
from bokeh.plotting import figure, from_networkx
import requests
import json
import google.generativeai as genai
from dotenv import load_dotenv
import os

load_dotenv()

PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"


def extract_edges(keywords):
    keywords = [kw.strip() for kw in keywords.split(",")]
    edges = [
        (keywords[i], keywords[j])
        for i in range(len(keywords))
        for j in range(i + 1, len(keywords))
    ]
    return edges


def create_knowledge_graph(data):
    G = nx.Graph()

    for _, row in data.iterrows():
        words = []
        for col in data.columns:
            if pd.notnull(row[col]):
                # Convert to string and handle numeric values
                cell_value = str(row[col]).strip()
                if cell_value:
                    words.extend(cell_value.split())

        if words:
            edges = extract_edges(",".join(words))
            G.add_edges_from(edges)

            for word in words:
                word = word.strip()
                if word not in G:
                    G.add_node(word, title=word, value=len(word))

    return G


def render_graph_bokeh(G):
    plot = figure(
        title="Interactive Knowledge Graph",
        x_range=(-1.5, 1.5),
        y_range=(-1.5, 1.5),
        tools="pan,wheel_zoom,box_zoom,reset,tap",
        active_scroll="wheel_zoom",
    )
    plot.add_tools(HoverTool(tooltips="@index"))

    graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0, 0))

    graph_renderer.node_renderer.glyph.size = 10
    graph_renderer.node_renderer.glyph.fill_color = "blue"
    graph_renderer.node_renderer.glyph.line_color = "black"

    graph_renderer.edge_renderer.glyph.line_width = 1
    graph_renderer.edge_renderer.glyph.line_color = "gray"

    plot.renderers.append(graph_renderer)

    return plot


import re


def search_papers(topic: str, num_papers: int) -> list:
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
        "Content-Type": "application/json",
    }

    prompt = f"""Find {num_papers} recent research papers about {topic}.
    Return ONLY a valid JSON array with the following structure for each paper:
    [
        {{
            "Title": "paper title",
            "Abstract": "abstract text",
            "Keywords": "key terms"
        }}
    ]"""

    payload = {
        "model": "llama-3.1-sonar-small-128k-chat",
        "messages": [
            {
                "role": "system",
                "content": "You are a research paper analyzer that returns valid JSON arrays.",
            },
            {"role": "user", "content": prompt},
        ],
        "temperature": 0.1,
    }

    try:
        response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
        response.raise_for_status()
        content = response.json()["choices"][0]["message"]["content"]

        # Clean response to ensure valid JSON
        content = content.strip()
        if not content.startswith("["):
            content = content[content.find("[") :]
        if not content.endswith("]"):
            content = content[: content.rfind("]") + 1]

        # Remove any trailing commas before closing brackets
        content = re.sub(r",\s*]", "]", content)
        content = re.sub(r",\s*}", "}", content)

        papers = json.loads(content)
        if not isinstance(papers, list):
            raise ValueError("Response is not a JSON array")
        return papers
    except requests.exceptions.RequestException as e:
        st.error(f"API Request Error: {str(e)}")
        return []
    except json.JSONDecodeError as e:
        st.error(f"Invalid JSON response: {str(e)}")
        st.error(f"Response content: {response.text}")
        return []
    except ValueError as e:
        st.error(f"Error: {str(e)}")
        return []




GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"


def call_gemini_api(prompt: str) -> str:
    headers = {
        "Authorization": f"Bearer {GEMINI_API_KEY}",
        "Content-Type": "application/json",
    }

    payload = {
        "prompt": prompt,
        "max_tokens": 150,
        "temperature": 0.7,
    }

    try:
        model = genai.GenerativeModel("gemini-pro")
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        st.error(f"Gemini API Error: {str(e)}")
        return ""


def generate_gaps_paragraph(gaps):
    prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps  provide evidence-based(how did you deduce this) recommendations for new research paper ideas based on these gaps,   Justify your recommendations with evidence from the data it analyzes. Give atleast 10 new  research paper ideas based on the gaps and 500 words gap analysis,Give a different table for new research ideas with evidence that why did you reccommend it, In the new Table Containing Research Paper Ideas there should be 4 columns - New Research Paper Idea/Title,Evidence , Methodology(How will we approach the ideo new research paper), Evidence Of How Methodology was obtained: {', '.join(gaps)}"
    return call_gemini_api(prompt)


def generate_insights(G, topic):
    papers = search_papers(topic, 5)
    if papers:
        st.write("### Research Insights from Perplexity API")
        for paper in papers:
            st.write(f"**Title:** {paper['Title']}")
            st.write(f"**Abstract:** {paper['Abstract']}")
            st.write(f"**Keywords:** {paper['Keywords']}")
            st.write("---")

    nodes = list(G.nodes(data=True))
    insights = {}
    insights["Strong Points"] = [
        n for n, d in nodes if G.degree(n) > len(G.nodes) * 0.1
    ]
    insights["Weak Points"] = [n for n, d in nodes if G.degree(n) < len(G.nodes) * 0.05]
    insights["Gaps"] = [n for n, d in nodes if len(list(nx.neighbors(G, n))) == 0]

    st.write("### Graph-Based Insights")
    st.write("**Strong Points:**", insights["Strong Points"])
    st.write("**Weak Points:**", insights["Weak Points"])
    st.write("**Gaps:**", insights["Gaps"])

    if insights["Gaps"]:
        with st.spinner("Generating insights about gaps..."):
            gaps_paragraph = generate_gaps_paragraph(insights["Gaps"])
            if gaps_paragraph:
                st.write("### Gaps in Research")
                st.write(gaps_paragraph)
                st.download_button(
                    label="Download Gaps Analysis as Text",
                    data=gaps_paragraph,
                    file_name="gaps_analysis.txt",
                    mime="text/plain",
                )

def main():
    st.title("Advanced Interactive Knowledge Graph")
    st.write(
        "Upload a CSV file to generate a fully interactive and insightful knowledge graph."
    )

    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

    if uploaded_file is not None:
        try:
            data = pd.read_csv(uploaded_file)
            st.write("Preview of the uploaded data:")
            st.dataframe(data.head())

            G = create_knowledge_graph(data)

            st.write("Generated Knowledge Graph:")
            plot = render_graph_bokeh(G)
            st.bokeh_chart(plot, use_container_width=True)

            topic = st.text_input(
                "Enter a topic for additional insights:", "knowledge graphs"
            )
            if topic:
                generate_insights(G, topic)

        except Exception as e:
            st.error(f"An error occurred while processing the file: {e}")
    else:
        st.info("Please upload a CSV file to get started.")


if __name__ == "__main__":
    main()