Spaces:

SPJIMR-Internship
/

SPJIMR_FlipClassroom_RCopilot_ResearchInternship

Sleeping

App Files Files Community

YashJD commited on Jan 12

Commit

e107ee4

1 Parent(s): f885955

Initial Commit

Browse files

Files changed (36) hide show

.gitignore +22 -0
README.md +5 -7
Research Paper Attributes.txt +98 -0
analytics.py +97 -0
app.py +1424 -0
chatbot.py +67 -0
create_course.py +272 -0
create_course2.py +331 -0
db.py +696 -0
entire_download.py +90 -0
extract.py +140 -0
file_upload_vectorize.py +179 -0
gen_mcqs.py +206 -0
goals2.py +658 -0
infranew.py +231 -0
keywords_database_download.py +104 -0
live_polls.py +115 -0
loldude.py +135 -0
modify_schema.py +222 -0
new_keywords.py +127 -0
new_research_paper.py +103 -0
poll_db_operations.py +70 -0
poll_db_setup.py +35 -0
pre_class_analytics2.py +759 -0
pre_class_analytics4.py +592 -0
requirements.txt +37 -0
research22.py +517 -0
research3.py +110 -0
research_assistant_dashboard.py +349 -0
research_combine.py +188 -0
research_combine2.py +269 -0
sciclone.py +466 -0
session_page.py +0 -0
ui.py +111 -0
utils/helpers.py +83 -0
utils/sample_data.py +226 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,22 @@

+# Ignore .env file
+.env
+__pycache__/
+newenv
+backupgoal.py
+backupgoal2.py
+backupresearch.py
+goals.py
+goals3.py
+research_assistant_dashboard2.py
+tempCodeRunnerFile.py
+all_chat_histories.json
+all_chat_histories2.json
+analytics.ipynb
+chat_history.csv
+harshal.py
+course_creation.py
+topics.json
+new_analytics.json
+new_analytics2.json
+pre_class_analytics.py
+sample_files/

README.md CHANGED Viewed

@@ -1,13 +1,11 @@
 ---
-title: FlipClass RCopilot
-emoji: 📉
-colorFrom: red
-colorTo: indigo
 sdk: streamlit
 sdk_version: 1.41.1
 app_file: app.py
 pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: NovaScholar
+emoji: 🐢
+colorFrom: purple
+colorTo: red
 sdk: streamlit
 sdk_version: 1.41.1
 app_file: app.py
 pinned: false
+short_description: Generative-AI powered Flipped Classroom Learning Platform
 ---

Research Paper Attributes.txt ADDED Viewed

	@@ -0,0 +1,98 @@

+Review Based Paper
+Title TEXT,
+Publication TEXT,
+Journal_Conference TEXT,
+Abstract TEXT,
+Keywords TEXT,
+Author TEXT
+Date_of_Publication TEXT,
+Intro TEXT,
+Literature_Review TEXT,
+Body: TEXT
+Protocol: TEXT
+Search String: TEXT
+Included Studies: TEXT
+Data Collection and Analysis Methods: TEXT
+Data Extraction Table: TEXT
+Synthesis and Analysis: TEXT
+Conclusion
+Limitations
+Results
+References
+Risk of Bias Assessment:Opinion/Perspective Based Paper
+Title TEXT,
+Publication TEXT,
+Journal_Conference TEXT,
+Abstract TEXT,
+Keywords TEXT,
+Author TEXT,
+Date_of_Publication TEXT,
+Intro TEXT,
+Literature_Review TEXT
+Introduction: TEXT
+Body: TEXT
+Results and Discussion:TEXT
+Conclusion: TEXT
+References: TEXT
+Empirical Research Paper
+Title TEXT,
+Publication TEXT,
+Journal_Conference TEXT,
+Abstract TEXT,
+Keywords TEXT,
+Author TEXT,
+Date_of_Publication TEXT,
+Intro TEXT,
+Literature_Review TEXT
+Introduction: TEXT
+Body: TEXT
+ Methodology: TEXT
+        Participants: TEXT - Describes the sample and the sampling methods used.
+Survey Instrument: TEXT - Describes the design and development of the survey questionnaire.
+Data Collection: TEXT - Explains how the survey data was collected.
+Data Analysis: TEXT - Details the statistical techniques used to analyze the data.
+Results and Discussion:TEXT
+Conclusion: TEXT
+References: TEXT
+Research Paper (Other)
+Title TEXT,
+Publication TEXT,
+Journal_Conference TEXT,
+Abstract TEXT,
+Keywords TEXT,
+Author TEXT,
+Date_of_Publication TEXT,
+Intro TEXT,
+Literature_Review TEXT,
+Research_Models_Used TEXT,
+Methodology TEXT,
+Discussion TEXT,
+Future_Scope TEXT,
+Theory TEXT,
+Independent_Variables TEXT,
+nof_Independent_Variables INTEGER,
+Dependent_Variables TEXT,
+nof_Dependent_Variables INTEGER,
+Control_Variables TEXT,
+Extraneous_Variables TEXT,
+nof_Control_Variables INTEGER,
+nof_Extraneous_Variables INTEGER

analytics.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import os
+import pandas as pd
+import numpy as np
+from numpy.linalg import norm
+from pymongo import MongoClient
+import openai
+from openai import OpenAI
+import streamlit as st
+from datetime import datetime
+# MongoDB connection
+MONGO_URI = os.getenv('MONGO_URI')
+client = MongoClient(MONGO_URI)
+db = client['digital_nova']
+themes_collection = db['themes']
+corpus_collection = db['corpus']
+vectors_collection = db['vectors']  # Reference to 'vectors' collection
+users_collection = db['users']
+# Function to create embeddings
+def create_embeddings(text, openai_api_key):
+    client = OpenAI(api_key=openai_api_key)
+    response = client.embeddings.create(
+        input=text,
+        model="text-embedding-3-small"
+    )
+    return response.data[0].embedding
+# Function to calculate cosine similarity
+def cosine_similarity(v1, v2):
+    v1 = np.array(v1)
+    v2 = np.array(v2)
+    dot_product = np.dot(v1, v2)
+    norm_product = norm(v1) * norm(v2)
+    return dot_product / norm_product if norm_product != 0 else 0
+def derive_analytics(goal, reference_text, openai_api_key, context=None, synoptic=None):
+    """
+    Analyze subjective answers with respect to pre-class materials and synoptic, and provide detailed feedback
+    Args:
+        goal (str): Analysis objective
+        reference_text (str): Student's answer text
+        openai_api_key (str): OpenAI API key
+        context (str, optional): Pre-class material content for comparison
+        synoptic (str, optional): Synoptic content for evaluation
+    """
+    template = f"""Given a student's answer to a subjective question, analyze it following these specific guidelines. Compare it with the provided pre-class materials and synoptic (if available) to assess correctness and completeness.
+    1. Analyze the text as an experienced educational assessor, considering:
+       - Conceptual understanding
+       - Factual accuracy
+       - Completeness of response
+       - Use of relevant terminology
+       - Application of concepts
+    2. Structure the output in markdown with two sections:
+    **Correctness Assessment**
+    - Rate overall correctness on a scale of 1-10
+    **Evidence-Based Feedback**
+    - Provide specific evidence from the student's answer to justify the score reduction
+    - Highlight the exact lines or phrases that need improvement
+    Pre-class Materials Context:
+    {context if context else "No reference materials provided"}
+    Synoptic:
+    {synoptic if synoptic else "No synoptic provided"}
+    Student's Answer:
+    {reference_text}
+    Rules:
+    - Base assessment strictly on provided content
+    - Be specific in feedback and suggestions
+    """
+    # Initialize OpenAI client
+    client = OpenAI(api_key=openai_api_key)
+    try:
+        response = client.chat.completions.create(
+            model="gpt-4-0125-preview",
+            messages=[
+                {"role": "system", "content": "You are an educational assessment expert."},
+                {"role": "user", "content": template}
+            ],
+            temperature=0.7
+        )
+        analysis = response.choices[0].message.content
+        return analysis
+    except Exception as e:
+        print(f"Error in generating analysis with OpenAI: {str(e)}")
+        return "Error generating analysis"

app.py ADDED Viewed

	@@ -0,0 +1,1424 @@

+import re
+import streamlit as st
+from datetime import datetime, date, time, timedelta
+from pathlib import Path
+from utils.sample_data import SAMPLE_COURSES, SAMPLE_SESSIONS
+from session_page import display_session_content
+from db import (
+    courses_collection2,
+    faculty_collection,
+    students_collection,
+    research_assistants_collection,
+    analysts_collection,
+)
+from werkzeug.security import generate_password_hash, check_password_hash
+import os
+from openai import OpenAI
+from dotenv import load_dotenv
+from create_course2 import create_course, courses_collection, generate_perplexity_response, generate_session_resources, PERPLEXITY_API_KEY, validate_course_plan
+import json
+from bson import ObjectId
+client = OpenAI(api_key=os.getenv("OPENAI_KEY"))
+from dotenv import load_dotenv
+load_dotenv()
+# PERPLEXITY_API_KEY = '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'
+def get_research_papers(query):
+    """Get research paper recommendations based on query"""
+    try:
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a helpful research assistant. Provide 10 relevant research papers with titles, authors, brief descriptions, and DOI/URL links. Format each paper as: \n\n1. **Title**\nAuthors: [names]\nLink: [DOI/URL]\nDescription: [brief summary]",
+                },
+                {
+                    "role": "user",
+                    "content": f"Give me 10 research papers about: {query}. Include valid DOI links or URLs to the papers where available.",
+                },
+            ],
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error getting recommendations: {str(e)}"
+def analyze_research_gaps(papers):
+    """Analyze gaps in research based on recommended papers"""
+    try:
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a research analysis expert. Based on the provided papers, identify potential research gaps and future research directions.",
+                },
+                {
+                    "role": "user",
+                    "content": f"Based on these papers, what are the key areas that need more research?\n\nPapers:\n{papers}",
+                },
+            ],
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error analyzing research gaps: {str(e)}"
+def init_session_state():
+    """Initialize session state variables"""
+    if "authenticated" not in st.session_state:
+        st.session_state.authenticated = False
+    if "user_id" not in st.session_state:
+        st.session_state.user_id = None
+    if "user_type" not in st.session_state:
+        st.session_state.user_type = None
+    if "username" not in st.session_state:
+        st.session_state.username = None
+    if "selected_course" not in st.session_state:
+        st.session_state.selected_course = None
+    if "show_create_course_form" not in st.session_state:
+        st.session_state.show_create_course_form = False
+    if "show_create_session_form" not in st.session_state:
+        st.session_state.show_create_session_form = False
+    if "show_enroll_course_page" not in st.session_state:
+        st.session_state.show_enroll_course_page = False
+    if "course_to_enroll" not in st.session_state:
+        st.session_state.course_to_enroll = None
+def login_user(username, password, user_type):
+    """Login user based on credentials"""
+    if user_type == "student":
+        # user = students_collection.find_one({"full_name": username}) or students_collection.find_one({"username": username})
+        user = students_collection.find_one({"$or": [{"full_name": username}, {"username": username}]})
+    elif user_type == "faculty":
+        user = faculty_collection.find_one({"full_name": username})
+    elif user_type == "research_assistant":
+        user = research_assistants_collection.find_one({"full_name": username})
+    elif user_type == "analyst":
+        user = analysts_collection.find_one({"full_name": username})
+    if user and check_password_hash(user["password"], password):
+        st.session_state.user_id = user["_id"]
+        print(st.session_state.user_id)
+        st.session_state.authenticated = True
+        st.session_state.user_type = user_type
+        st.session_state.username = username
+        return True
+    return False
+# def login_form():
+#     """Display login form"""
+#     st.title("Welcome to NOVAScholar")
+#     with st.form("login_form"):
+#         user_type = st.selectbox(
+#             "Please select your Role", ["student", "faculty", "research_assistant", "analyst"]
+#         )
+#         username = st.text_input("Username")
+#         password = st.text_input("Password", type="password")
+#         submit = st.form_submit_button("Login")
+#         if submit:
+#             if login_user(username, password, user_type):
+#                 st.success("Login successful!")
+#                 st.rerun()
+#             else:
+#                 st.error("Invalid credentials!")
+def login_form():
+    """Display enhanced login form"""
+    st.title("Welcome to NOVAScholar")
+    with st.form("login_form"):
+        # Role selection at the top
+        user_type = st.selectbox(
+            "Please select your Role",
+            ["student", "faculty", "research_assistant", "analyst"]
+        )
+        # Username/email and password stacked vertically
+        username = st.text_input("Username or Email")
+        password = st.text_input("Password", type="password")
+        # Login button
+        submit = st.form_submit_button("Login")
+        if submit:
+            # Handle both username and email login
+            if '@' in username:
+                username = extract_username(username)
+            if login_user(username, password, user_type):
+                st.success("Login successful!")
+                st.rerun()
+            else:
+                st.error("Invalid credentials!")
+def get_courses(username, user_type):
+    if user_type == "student":
+        student = students_collection.find_one({"$or": [{"full_name": username}, {"username": username}]})
+        if student:
+            enrolled_course_ids = [
+                course["course_id"] for course in student.get("enrolled_courses", [])
+            ]
+            courses = courses_collection.find(
+                {"course_id": {"$in": enrolled_course_ids}}
+            )
+            # courses += courses_collection2.find(
+            #     {"course_id": {"$in": enrolled_course_ids}}
+            # )
+            # # course_titles = [course['title'] for course in courses]
+            # return list(courses)
+            # courses_cursor1 = courses_collection.find(
+            #     {"course_id": {"$in": enrolled_course_ids}}
+            # )
+            # courses_cursor2 = courses_collection2.find(
+            #     {"course_id": {"$in": enrolled_course_ids}}
+            # )
+            # courses = list(courses_cursor1) + list(courses_cursor2)
+            return list(courses)
+    elif user_type == "faculty":
+        faculty = faculty_collection.find_one({"full_name": username})
+        if faculty:
+            course_ids = [
+                course["course_id"] for course in faculty.get("courses_taught", [])
+            ]
+            # courses_1 = list(courses_collection2.find({"course_id": {"$in": course_ids}}))
+            courses_2 = list(courses_collection.find({"course_id": {"$in": course_ids}}))
+            return courses_2
+    elif user_type == "research_assistant":
+        research_assistant = research_assistants_collection.find_one(
+            {"full_name": username}
+        )
+        if research_assistant:
+            course_ids = [
+                course["course_id"]
+                for course in research_assistant.get("courses_assisted", [])
+            ]
+            courses = courses_collection2.find({"course_id": {"$in": course_ids}})
+            return list(courses)
+    else:
+        return []
+def get_course_ids():
+    """Get course IDs for sample courses"""
+    return [course["course_id"] for course in SAMPLE_COURSES]
+def get_sessions(course_id, course_title):
+    """Get sessions for a given course ID"""
+    course = courses_collection.find_one({"course_id": course_id, "title": course_title})
+    if course:
+        return course.get("sessions", [])
+    return []
+def create_session(new_session, course_id):
+    """Create a new session for a given course ID"""
+    course = courses_collection2.find_one({"course_id": course_id}) | courses_collection.find_one({"course_id": course_id})
+    if course:
+        last_session_id = max((session["session_id"] for session in course["sessions"]))
+        last_session_id = int(last_session_id[1:])
+        new_session_id = last_session_id + 1
+        new_session["session_id"] = "S" + str(new_session_id)
+        courses_collection2.update_one(
+            {"course_id": new_session["course_id"]},
+            {"$push": {"sessions": new_session}},
+        )
+        return True
+    return False
+def create_session_form(course_id):
+    """Display form to create a new session and perform the creation operation"""
+    st.title("Create New Session")
+    if 'session_time' not in st.session_state:
+        st.session_state.session_time = datetime.now().time()
+    if 'show_create_session_form' not in st.session_state:
+        st.session_state.show_create_session_form = False
+    with st.form("create_session_form"):
+        session_title = st.text_input("Session Title")
+        session_date = st.date_input("Session Date", date.today(), key="session_date")
+        session_time = st.time_input(
+            "Session Time", st.session_state.session_time, key="session_time"
+        )
+        new_session_id = None
+        # Generate new session ID
+        course = courses_collection2.find_one({"course_id": course_id})
+        if course and "sessions" in course and course["sessions"]:
+            last_session_id = max(
+                int(session["session_id"][1:]) for session in course["sessions"]
+            )
+            new_session_id = last_session_id + 1
+        else:
+            new_session_id = 1
+        if st.form_submit_button("Create Session"):
+            clicked = True
+            new_session = {
+                "session_id": f"S{new_session_id}",
+                "course_id": course_id,
+                "title": session_title,
+                "date": datetime.combine(session_date, session_time),
+                "status": "upcoming",
+                "created_at": datetime.utcnow(),
+                "pre_class": {
+                    "resources": [],
+                    "completetion_required": True,
+                },
+                "in_class": {
+                    "topics": [],
+                    "quiz": {"title": "", "questions": 0, "duration": 0},
+                    "polls": [],
+                },
+                "post_class": {
+                    "assignments": [],
+                },
+            }
+            courses_collection2.update_one(
+                {"course_id": course_id}, {"$push": {"sessions": new_session}}
+            )
+            st.success("Session created successfully!")
+            st.session_state.show_create_session_form = False
+    #     new_session_id = None
+    #     creation_success = False
+    #     # Generate new session ID
+    #     course = courses_collection2.find_one({"course_id": course_id})
+    #     if course and 'sessions' in course and course['sessions']:
+    #         last_session_id = max((session['session_id'] for session in course['sessions']))
+    #         last_session_id = int(last_session_id[1:])
+    #         new_session_id = last_session_id + 1
+    #     else:
+    #         new_session_id = 1
+    #         new_session = {
+    #             "session_id": 'S' + new_session_id,
+    #             "title": session_title,
+    #             "date": datetime.datetime.combine(session_date, session_time).isoformat(),
+    #             "status": "upcoming",
+    #             "created_at": datetime.datetime.utcnow().isoformat(),
+    #             "pre_class": {
+    #                 "resources": [],
+    #                 "completetion_required": True,
+    #             },
+    #             "in_class": {
+    #                 "topics": [],
+    #                 "quiz":
+    #                 {
+    #                     "title": '',
+    #                     "questions": 0,
+    #                     "duration": 0
+    #                 },
+    #                 "polls": []
+    #             },
+    #             "post_class": {
+    #                 "assignments": [],
+    #             }
+    #         }
+    #         courses_collection2.update_one(
+    #             {"course_id": course_id},
+    #             {"$push": {"sessions": new_session}}
+    #             )
+    #         creation_success = True
+    #     st.form_submit_button("Create Session")
+    # if creation_success == True:
+    #     st.success("Session created successfully!")
+    # else:
+def get_new_student_id():
+    """Generate a new student ID by incrementing the last student ID"""
+    last_student = students_collection.find_one(sort=[("SID", -1)])
+    if last_student:
+        last_student_id = int(last_student["SID"][1:])
+        new_student_id = f"S{last_student_id + 1}"
+    else:
+        new_student_id = "S101"
+    return new_student_id
+def get_new_faculty_id():
+    """Generate a new faculty ID by incrementing the last faculty ID"""
+    last_faculty = faculty_collection.find_one(sort=[("TID", -1)])
+    if last_faculty:
+        last_faculty_id = int(last_faculty["TID"][1:])
+        new_faculty_id = f"T{last_faculty_id + 1}"
+    else:
+        new_faculty_id = "T101"
+    return new_faculty_id
+def get_new_course_id():
+    """Generate a new course ID by incrementing the last course ID"""
+    last_course = courses_collection2.find_one(sort=[("course_id", -1)])
+    if last_course:
+        last_course_id = int(last_course["course_id"][2:])
+        new_course_id = f"CS{last_course_id + 1}"
+    else:
+        new_course_id = "CS101"
+    return new_course_id
+# def register_page():
+#     st.title("Register")
+#     if "user_type" not in st.session_state:
+#         st.session_state.user_type = "student"
+#     # Select user type
+#     st.session_state.user_type = st.selectbox(
+#         "Select User Type", ["student", "faculty", "research_assistant"]
+#     )
+#     user_type = st.session_state.user_type
+#     print(user_type)
+#     with st.form("register_form"):
+#         # user_type = st.selectbox("Select User Type", ["student", "faculty", "research_assistant"])
+#         # print(user_type)
+#         full_name = st.text_input("Full Name")
+#         password = st.text_input("Password", type="password")
+#         confirm_password = st.text_input("Confirm Password", type="password")
+#         if user_type == "student":
+#             # Fetch courses for students to select from
+#             courses = list(courses_collection2.find({}, {"course_id": 1, "title": 1}))
+#             course_options = [
+#                 f"{course['title']} ({course['course_id']})" for course in courses
+#             ]
+#             selected_courses = st.multiselect("Available Courses", course_options)
+#         submit = st.form_submit_button("Register")
+#         if submit:
+#             if password == confirm_password:
+#                 hashed_password = generate_password_hash(password)
+#                 if user_type == "student":
+#                     new_student_id = get_new_student_id()
+#                     enrolled_courses = [
+#                         {
+#                             "course_id": course.split("(")[-1][:-1],
+#                             "title": course.split(" (")[0],
+#                         }
+#                         for course in selected_courses
+#                     ]
+#                     students_collection.insert_one(
+#                         {
+#                             "SID": new_student_id,
+#                             "full_name": full_name,
+#                             "password": hashed_password,
+#                             "enrolled_courses": enrolled_courses,
+#                             "created_at": datetime.utcnow(),
+#                         }
+#                     )
+#                     st.success(
+#                         f"Student registered successfully with ID: {new_student_id}"
+#                     )
+#                 elif user_type == "faculty":
+#                     new_faculty_id = get_new_faculty_id()
+#                     faculty_collection.insert_one(
+#                         {
+#                             "TID": new_faculty_id,
+#                             "full_name": full_name,
+#                             "password": hashed_password,
+#                             "courses_taught": [],
+#                             "created_at": datetime.utcnow(),
+#                         }
+#                     )
+#                     st.success(
+#                         f"Faculty registered successfully with ID: {new_faculty_id}"
+#                     )
+#                 elif user_type == "research_assistant":
+#                     research_assistants_collection.insert_one(
+#                         {
+#                             "full_name": full_name,
+#                             "password": hashed_password,
+#                             "created_at": datetime.utcnow(),
+#                         }
+#                     )
+#                     st.success("Research Assistant registered successfully!")
+#             else:
+#                 st.error("Passwords do not match")
+def get_new_analyst_id():
+    """Generate a new analyst ID by incrementing the last analyst ID"""
+    last_analyst = analysts_collection.find_one(sort=[("AID", -1)])
+    if last_analyst:
+        last_id = int(last_analyst["AID"][1:])
+        new_id = f"A{last_id + 1}"
+    else:
+        new_id = "A1"
+    return new_id
+# def register_page():
+#     st.title("Register")
+#     if "user_type" not in st.session_state:
+#         st.session_state.user_type = "student"
+#     # Select user type
+#     st.session_state.user_type = st.selectbox(
+#         "Please select your Role", ["student", "faculty", "research_assistant", "analyst"]
+#     )
+#     user_type = st.session_state.user_type
+#     print(user_type)
+#     with st.form("register_form"):
+#         full_name = st.text_input("Full Name")
+#         password = st.text_input("Password", type="password")
+#         confirm_password = st.text_input("Confirm Password", type="password")
+#         if user_type == "student":
+#             # Fetch courses for students to select from
+#             courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
+#             course_options = [
+#                 f"{course['title']} ({course['course_id']})" for course in courses
+#             ]
+#             selected_courses = st.multiselect("Available Courses", course_options)
+#         submit = st.form_submit_button("Register")
+#         if submit:
+#             if password == confirm_password:
+#                 hashed_password = generate_password_hash(password)
+#                 if user_type == "student":
+#                     new_student_id = get_new_student_id()
+#                     enrolled_courses = [
+#                         {
+#                             "course_id": course.split("(")[-1][:-1],
+#                             "title": course.split(" (")[0],
+#                         }
+#                         for course in selected_courses
+#                     ]
+#                     students_collection.insert_one(
+#                         {
+#                             "SID": new_student_id,
+#                             "full_name": full_name,
+#                             "password": hashed_password,
+#                             "enrolled_courses": enrolled_courses,
+#                             "created_at": datetime.utcnow(),
+#                         }
+#                     )
+#                     st.success(
+#                         f"Student registered successfully with ID: {new_student_id}"
+#                     )
+#                 elif user_type == "faculty":
+#                     new_faculty_id = get_new_faculty_id()
+#                     faculty_collection.insert_one(
+#                         {
+#                             "TID": new_faculty_id,
+#                             "full_name": full_name,
+#                             "password": hashed_password,
+#                             "courses_taught": [],
+#                             "created_at": datetime.utcnow(),
+#                         }
+#                     )
+#                     st.success(
+#                         f"Faculty registered successfully with ID: {new_faculty_id}"
+#                     )
+#                 elif user_type == "research_assistant":
+#                     research_assistants_collection.insert_one(
+#                         {
+#                             "full_name": full_name,
+#                             "password": hashed_password,
+#                             "created_at": datetime.utcnow(),
+#                         }
+#                     )
+#                     st.success("Research Assistant registered successfully!")
+#                 elif user_type == "analyst":
+#                     # new_analyst_id = get_new_analyst_id()
+#                     analysts_collection.insert_one(
+#                         {
+#                             # "AID": new_analyst_id,
+#                             "full_name": full_name,
+#                             "password": hashed_password,
+#                             "created_at": datetime.utcnow(),
+#                         }
+#                     )
+#                     st.success("Analyst registered successfully!")
+#             else:
+#                 st.error("Passwords do not match")
+def register_page():
+    st.title("Register for NOVAScholar")
+    if "user_type" not in st.session_state:
+        st.session_state.user_type = "student"
+    # Select user type
+    st.session_state.user_type = st.selectbox(
+        "Please select your Role",
+        ["student", "faculty", "research_assistant", "analyst"]
+    )
+    user_type = st.session_state.user_type
+    with st.form("register_form"):
+        col1, col2 = st.columns(2)
+        with col1:
+            full_name = st.text_input("Full Name")
+            email = st.text_input("Institutional Email")
+            phone = st.text_input("Phone Number")
+        with col2:
+            password = st.text_input("Password", type="password")
+            confirm_password = st.text_input("Confirm Password", type="password")
+        if user_type == "student":
+            courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
+            course_options = [f"{course['title']} ({course['course_id']})" for course in courses]
+            selected_courses = st.multiselect("Available Courses", course_options)
+        submit = st.form_submit_button("Register")
+        if submit:
+            # Validate email
+            email_valid, email_msg = validate_email(email)
+            if not email_valid:
+                st.error(email_msg)
+                return
+            # Validate phone
+            phone_valid, phone_msg = validate_phone(phone)
+            if not phone_valid:
+                st.error(phone_msg)
+                return
+            # Validate password match
+            if password != confirm_password:
+                st.error("Passwords do not match")
+                return
+            # Extract username from email
+            username = extract_username(email)
+            # Check if username already exists
+            if user_type == "student":
+                existing_user = students_collection.find_one({"username": username})
+            elif user_type == "faculty":
+                existing_user = faculty_collection.find_one({"username": username})
+            elif user_type == "research_assistant":
+                existing_user = research_assistants_collection.find_one({"username": username})
+            elif user_type == "analyst":
+                existing_user = analysts_collection.find_one({"username": username})
+            if existing_user:
+                st.error("A user with this email already exists")
+                return
+            # Hash password and create user
+            hashed_password = generate_password_hash(password)
+            user_data = {
+                "username": username,
+                "full_name": full_name,
+                "email": email,
+                "phone": phone,
+                "password": hashed_password,
+                "created_at": datetime.utcnow()
+            }
+            if user_type == "student":
+                new_student_id = get_new_student_id()
+                enrolled_courses = [
+                    {
+                        "course_id": course.split("(")[-1][:-1],
+                        "title": course.split(" (")[0],
+                    }
+                    for course in selected_courses
+                ]
+                user_data["SID"] = new_student_id
+                user_data["enrolled_courses"] = enrolled_courses
+                students_collection.insert_one(user_data)
+                st.success(f"Student registered successfully! Your username is: {username}")
+            elif user_type == "faculty":
+                new_faculty_id = get_new_faculty_id()
+                user_data["TID"] = new_faculty_id
+                user_data["courses_taught"] = []
+                faculty_collection.insert_one(user_data)
+                st.success(f"Faculty registered successfully! Your username is: {username}")
+            elif user_type == "research_assistant":
+                research_assistants_collection.insert_one(user_data)
+                st.success(f"Research Assistant registered successfully! Your username is: {username}")
+            elif user_type == "analyst":
+                analysts_collection.insert_one(user_data)
+                st.success(f"Analyst registered successfully! Your username is: {username}")
+# Create Course feature
+# def create_course_form2(faculty_name, faculty_id):
+#     """Display enhanced form to create a new course with AI-generated content"""
+#     st.title("Create New Course")
+#     if 'course_plan' not in st.session_state:
+#         st.session_state.course_plan = None
+#     if 'edit_mode' not in st.session_state:
+#         st.session_state.edit_mode = False
+#     # Initial Course Creation Form
+#     if not st.session_state.course_plan:
+#         with st.form("initial_course_form"):
+#             col1, col2 = st.columns(2)
+#             with col1:
+#                 course_name = st.text_input("Course Name", placeholder="e.g., Introduction to Computer Science")
+#                 faculty_info = st.text_input("Faculty", value=faculty_name, disabled=True)
+#             with col2:
+#                 duration_weeks = st.number_input("Duration (weeks)", min_value=1, max_value=16, value=12)
+#                 start_date = st.date_input("Start Date")
+#             generate_button = st.form_submit_button("Generate Course Structure", use_container_width=True)
+#             if generate_button and course_name:
+#                 with st.spinner("Generating course structure..."):
+#                     try:
+#                         course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
+#                         # print(course_plan)
+#                         st.session_state.course_plan = json.loads(course_plan)
+#                         st.session_state.start_date = start_date
+#                         st.session_state.duration_weeks = duration_weeks
+#                         st.rerun()
+#                     except Exception as e:
+#                         st.error(f"Error generating course structure: {e}")
+#     # Display and Edit Generated Course Content
+#     if st.session_state.course_plan:
+#         with st.expander("Course Overview", expanded=True):
+#             if not st.session_state.edit_mode:
+#                 st.subheader(st.session_state.course_plan['course_title'])
+#                 st.write(st.session_state.course_plan['course_description'])
+#                 edit_button = st.button("Edit Course Details", use_container_width=True)
+#                 if edit_button:
+#                     st.session_state.edit_mode = True
+#                     st.rerun()
+#             else:
+#                 with st.form("edit_course_details"):
+#                     st.session_state.course_plan['course_title'] = st.text_input(
+#                         "Course Title",
+#                         value=st.session_state.course_plan['course_title']
+#                     )
+#                     st.session_state.course_plan['course_description'] = st.text_area(
+#                         "Course Description",
+#                         value=st.session_state.course_plan['course_description']
+#                     )
+#                     if st.form_submit_button("Save Course Details"):
+#                         st.session_state.edit_mode = False
+#                         st.rerun()
+#         # Display Modules and Sessions
+#         st.subheader("Course Modules and Sessions")
+#         start_date = st.session_state.start_date
+#         current_date = start_date
+#         all_sessions = []
+#         for module_idx, module in enumerate(st.session_state.course_plan['modules']):
+#             with st.expander(f"📚 Module {module_idx + 1}: {module['module_title']}", expanded=True):
+#                 # Edit module title
+#                 new_module_title = st.text_input(
+#                     f"Module {module_idx + 1} Title",
+#                     value=module['module_title'],
+#                     key=f"module_{module_idx}"
+#                 )
+#                 module['module_title'] = new_module_title
+#                 for sub_idx, sub_module in enumerate(module['sub_modules']):
+#                     st.markdown(f"### 📖 {sub_module['title']}")
+#                     # Create sessions for each topic
+#                     for topic_idx, topic in enumerate(sub_module['topics']):
+#                         session_key = f"session_{module_idx}_{sub_idx}_{topic_idx}"
+#                         with st.container():
+#                             col1, col2, col3 = st.columns([3, 2, 1])
+#                             with col1:
+#                                 new_topic = st.text_input(
+#                                     "Topic",
+#                                     value=topic,
+#                                     key=f"{session_key}_topic"
+#                                 )
+#                                 sub_module['topics'][topic_idx] = new_topic
+#                             with col2:
+#                                 session_date = st.date_input(
+#                                     "Session Date",
+#                                     value=current_date,
+#                                     key=f"{session_key}_date"
+#                                 )
+#                             with col3:
+#                                 session_status = st.selectbox(
+#                                     "Status",
+#                                     options=["upcoming", "in-progress", "completed"],
+#                                     key=f"{session_key}_status"
+#                                 )
+#                             # Create session object
+#                             session = {
+#                                 "session_id": str(ObjectId()),
+#                                 "title": new_topic,
+#                                 "date": datetime.combine(session_date, datetime.min.time()),
+#                                 "status": session_status,
+#                                 "module_name": module['module_title'],
+#                                 "created_at": datetime.utcnow(),
+#                                 "pre_class": {
+#                                     "resources": [],
+#                                     "completion_required": True
+#                                 },
+#                                 "in_class": {
+#                                     "quiz": [],
+#                                     "polls": []
+#                                 },
+#                                 "post_class": {
+#                                     "assignments": []
+#                                 }
+#                             }
+#                             all_sessions.append(session)
+#                             current_date = session_date + timedelta(days=7)
+#         new_course_id = get_new_course_id()
+#         course_title = st.session_state.course_plan['course_title']
+#         # Final Save Button
+#         if st.button("Save Course", type="primary", use_container_width=True):
+#             try:
+#                 course_doc = {
+#                     "course_id": new_course_id,
+#                     "title": course_title,
+#                     "description": st.session_state.course_plan['course_description'],
+#                     "faculty": faculty_name,
+#                     "faculty_id": faculty_id,
+#                     "duration": f"{st.session_state.duration_weeks} weeks",
+#                     "start_date": datetime.combine(st.session_state.start_date, datetime.min.time()),
+#                     "created_at": datetime.utcnow(),
+#                     "sessions": all_sessions
+#                 }
+#                 # Insert into database
+#                 courses_collection.insert_one(course_doc)
+#                 st.success("Course successfully created!")
+#                 # Update faculty collection
+#                 faculty_collection.update_one(
+#                     {"_id": st.session_state.user_id},
+#                     {
+#                         "$push": {
+#                             "courses_taught": {
+#                                 "course_id": new_course_id,
+#                                 "title": course_title,
+#                             }
+#                         }
+#                     },
+#                 )
+#                 # Clear session state
+#                 st.session_state.course_plan = None
+#                 st.session_state.edit_mode = False
+#                 # Optional: Add a button to view the created course
+#                 if st.button("View Course"):
+#                     # Add navigation logic here
+#                     pass
+#             except Exception as e:
+#                 st.error(f"Error saving course: {e}")
+def remove_json_backticks(json_string):
+    """Remove backticks and 'json' from the JSON object string"""
+    return json_string.replace("```json", "").replace("```", "").strip()
+def create_course_form(faculty_name, faculty_id):
+    """Display enhanced form to create a new course with AI-generated content and resources"""
+    st.title("Create New Course")
+    if 'course_plan' not in st.session_state:
+        st.session_state.course_plan = None
+    if 'edit_mode' not in st.session_state:
+        st.session_state.edit_mode = False
+    if 'resources_map' not in st.session_state:
+        st.session_state.resources_map = {}
+    if 'start_date' not in st.session_state:
+        st.session_state.start_date = None
+    if 'duration_weeks' not in st.session_state:
+        st.session_state.duration_weeks = None
+    if 'sessions_per_week' not in st.session_state:
+        st.session_state.sessions_per_week = None
+    # Initial Course Creation Form
+    if not st.session_state.course_plan:
+        with st.form("initial_course_form"):
+            col1, col2 = st.columns(2)
+            with col1:
+                course_name = st.text_input("Course Name", placeholder="e.g., Introduction to Computer Science")
+                faculty_info = st.text_input("Faculty", value=faculty_name, disabled=True)
+                sessions_per_week = st.number_input("Sessions Per Week", min_value=1, max_value=5, value=2)
+            with col2:
+                duration_weeks = st.number_input("Duration (weeks)", min_value=1, max_value=16, value=12)
+                start_date = st.date_input("Start Date")
+            generate_button = st.form_submit_button("Generate Course Structure", use_container_width=True)
+            if generate_button and course_name:
+                with st.spinner("Generating course structure and resources..."):
+                    try:
+                        # Generate course plan with resources
+                        course_plan = generate_perplexity_response(
+                            PERPLEXITY_API_KEY,
+                            course_name,
+                            duration_weeks,
+                            sessions_per_week
+                        )
+                        try:
+                            course_plan_json = json.loads(course_plan)
+                            validate_course_plan(course_plan_json)
+                            st.session_state.course_plan = course_plan_json
+                        except (json.JSONDecodeError, ValueError) as e:
+                            st.error(f"Error in course plan structure: {e}")
+                            return
+                        st.session_state.start_date = start_date
+                        st.session_state.duration_weeks = duration_weeks
+                        st.session_state.sessions_per_week = sessions_per_week
+                        # Generate resources for all sessions
+                        session_titles = []
+                        for module in st.session_state.course_plan['modules']:
+                            for sub_module in module['sub_modules']:
+                                for topic in sub_module['topics']:
+                                    # session_titles.append(topic['title'])
+                                    # session_titles.append(topic)
+                                    if isinstance(topic, dict):
+                                        session_titles.append(topic['title'])
+                                    else:
+                                        session_titles.append(topic)
+                        # In generate_session_resources function, add validation:
+                        if not session_titles:
+                            return json.dumps({"session_resources": []})
+                        resources_response = generate_session_resources(PERPLEXITY_API_KEY, session_titles)
+                        without_backticks = remove_json_backticks(resources_response)
+                        resources = json.loads(without_backticks)
+                        st.session_state.resources_map = {
+                            resource['session_title']: resource['resources']
+                            for resource in resources['session_resources']
+                        }
+                        # Add error handling for the resources map
+                        # if st.session_state.resources_map is None:
+                        #     st.session_state.resources_map = {}
+                        st.rerun()
+                    except Exception as e:
+                        st.error(f"Error generating course structure: {e}")
+    # Display and Edit Generated Course Content
+    if st.session_state.course_plan:
+        with st.expander("Course Overview", expanded=True):
+            if not st.session_state.edit_mode:
+                st.subheader(st.session_state.course_plan['course_title'])
+                st.write(st.session_state.course_plan['course_description'])
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    st.write(f"**Start Date:** {st.session_state.start_date}")
+                with col2:
+                    st.write(f"**Duration (weeks):** {st.session_state.duration_weeks}")
+                with col3:
+                    st.write(f"**Sessions Per Week:** {st.session_state.sessions_per_week}")
+                edit_button = st.button("Edit Course Details", use_container_width=True)
+                if edit_button:
+                    st.session_state.edit_mode = True
+                    st.rerun()
+            else:
+                with st.form("edit_course_details"):
+                    st.session_state.course_plan['course_title'] = st.text_input(
+                        "Course Title",
+                        value=st.session_state.course_plan['course_title']
+                    )
+                    st.session_state.course_plan['course_description'] = st.text_area(
+                        "Course Description",
+                        value=st.session_state.course_plan['course_description']
+                    )
+                    if st.form_submit_button("Save Course Details"):
+                        st.session_state.edit_mode = False
+                        st.rerun()
+        # Display Modules and Sessions
+        st.subheader("Course Modules and Sessions")
+        start_date = st.session_state.start_date
+        current_date = start_date
+        all_sessions = []
+        for module_idx, module in enumerate(st.session_state.course_plan['modules']):
+            with st.expander(f"📚 Module {module_idx + 1}: {module['module_title']}", expanded=True):
+                # Edit module title
+                new_module_title = st.text_input(
+                    f"Edit Module Title",
+                    value=module['module_title'],
+                    key=f"module_{module_idx}"
+                )
+                module['module_title'] = new_module_title
+                for sub_idx, sub_module in enumerate(module['sub_modules']):
+                    st.markdown("<br>", unsafe_allow_html=True)  # Add gap between sessions
+                    # st.markdown(f"### 📖 {sub_module['title']}")
+                    st.markdown(f'<h3 style="font-size: 1.25rem;">📖 Chapter {sub_idx + 1}: {sub_module["title"]}</h3>', unsafe_allow_html=True)
+                    # Possible fix:
+                    # Inside the loop where topics are being processed:
+                    for topic_idx, topic in enumerate(sub_module['topics']):
+                        st.markdown("<br>", unsafe_allow_html=True)  # Add gap between sessions
+                        session_key = f"session_{module_idx}_{sub_idx}_{topic_idx}"
+                        # Get topic title based on type
+                        if isinstance(topic, dict):
+                            current_topic_title = topic.get('title', '')
+                            current_topic_display = current_topic_title
+                        else:
+                            current_topic_title = str(topic)
+                            current_topic_display = current_topic_title
+                        with st.container():
+                            # Session Details
+                            col1, col2, col3 = st.columns([3, 2, 1])
+                            with col1:
+                                new_topic = st.text_input(
+                                    f"Session {topic_idx + 1} Title",
+                                    value=current_topic_display,
+                                    key=f"{session_key}_topic"
+                                )
+                                # Update the topic in the data structure
+                                if isinstance(topic, dict):
+                                    topic['title'] = new_topic
+                                else:
+                                    sub_module['topics'][topic_idx] = new_topic
+                            with col2:
+                                session_date = st.date_input(
+                                    "Session Date",
+                                    value=current_date,
+                                    key=f"{session_key}_date"
+                                )
+                            with col3:
+                                session_status = st.selectbox(
+                                    "Status",
+                                    options=["upcoming", "in-progress", "completed"],
+                                    key=f"{session_key}_status"
+                                )
+                            # Display Resources
+                            if st.session_state.resources_map:
+                                # Try both the full topic title and the display title
+                                resources = None
+                                if isinstance(topic, dict) and topic.get('title') in st.session_state.resources_map:
+                                    resources = st.session_state.resources_map[topic['title']]
+                                elif current_topic_title in st.session_state.resources_map:
+                                    resources = st.session_state.resources_map[current_topic_title]
+                                if resources:
+                                    with st.container():
+                                        # st.markdown("#### 📚 Session Resources")
+                                        st.markdown(f'<h4 style="font-size: 1.25rem;">📚 Session Resources</h4>', unsafe_allow_html=True)
+                                        # Readings Tab
+                                        if resources.get('readings'):
+                                            st.markdown(f'<h5 style="font-size: 1.1rem; margin-top: 1rem;">📖 External Resources</h5>', unsafe_allow_html=True)
+                                            col1, col2 = st.columns(2)
+                                            for idx, reading in enumerate(resources['readings']):
+                                                with col1 if idx % 2 == 0 else col2:
+                                                    st.markdown(f"""
+                                                        - **{reading['title']}**
+                                                        - Type: {reading['type']}
+                                                        - Estimated reading time: {reading['estimated_read_time']}
+                                                        - [Access Resource]({reading['url']})
+                                                    """)
+                                        # Books Tab and Additional Resources Tab side-by-side
+                                        col1, col2 = st.columns(2)
+                                        with col1:
+                                            if resources.get('books'):
+                                                st.markdown(f'<h5 style="font-size: 1.1rem; margin-top: 1rem;">📚 Reference Books</h5>', unsafe_allow_html=True)
+                                                for book in resources['books']:
+                                                    with st.container():
+                                                        st.markdown(f"""
+                                                            - **{book['title']}**
+                                                            - Author: {book['author']}
+                                                            - ISBN: {book['isbn']}
+                                                            - Chapters: {book['chapters']}
+                                                        """)
+                                        with col2:
+                                            if resources.get('additional_resources'):
+                                                st.markdown(f'<h5 style="font-size: 1.1rem; margin-top: 1rem;">🔗 Additional Study Resources</h5>', unsafe_allow_html=True)
+                                                for resource in resources['additional_resources']:
+                                                    with st.container():
+                                                        st.markdown(f"""
+                                                            - **{resource['title']}**
+                                                            - Type: {resource['type']}
+                                                            - Description: {resource['description']}
+                                                            - [Access Resource]({resource['url']})
+                                                        """)
+                            # Create session object
+                            session = {
+                                "session_id": str(ObjectId()),
+                                "title": new_topic,
+                                "date": datetime.combine(session_date, datetime.min.time()),
+                                "status": session_status,
+                                "module_name": module['module_title'],
+                                "created_at": datetime.utcnow(),
+                                "pre_class": {
+                                    "resources": [],
+                                    "completion_required": True
+                                },
+                                "in_class": {
+                                    "quiz": [],
+                                    "polls": []
+                                },
+                                "post_class": {
+                                    "assignments": []
+                                },
+                                "external_resources": st.session_state.resources_map.get(current_topic_title, {})
+                            }
+                            all_sessions.append(session)
+                            current_date = session_date + timedelta(days=7)
+        new_course_id = get_new_course_id()
+        course_title = st.session_state.course_plan['course_title']
+        # Final Save Button
+    if st.button("Save Course", type="primary", use_container_width=True):
+        try:
+            course_doc = {
+                "course_id": new_course_id,
+                "title": course_title,
+                "description": st.session_state.course_plan['course_description'],
+                "faculty": faculty_name,
+                "faculty_id": faculty_id,
+                "duration": f"{st.session_state.duration_weeks} weeks",
+                "sessions_per_week": st.session_state.sessions_per_week,
+                "start_date": datetime.combine(st.session_state.start_date, datetime.min.time()),
+                "created_at": datetime.utcnow(),
+                "sessions": all_sessions
+            }
+            # Insert into database
+            courses_collection.insert_one(course_doc)
+            st.success("Course successfully created!")
+            # Update faculty collection
+            faculty_collection.update_one(
+                {"_id": st.session_state.user_id},
+                {
+                    "$push": {
+                        "courses_taught": {
+                            "course_id": new_course_id,
+                            "title": course_title,
+                        }
+                    }
+                }
+            )
+            # Clear session state
+            st.session_state.course_plan = None
+            st.session_state.edit_mode = False
+            st.session_state.resources_map = {}
+            # Optional: Add a button to view the created course
+            if st.button("View Course"):
+                # Add navigation logic here
+                pass
+        except Exception as e:
+            st.error(f"Error saving course: {e}")
+from research_assistant_dashboard import display_research_assistant_dashboard
+from goals2 import display_analyst_dashboard
+def enroll_in_course(course_id, course_title, student):
+    """Enroll a student in a course"""
+    if student:
+        courses = student.get("enrolled_courses", [])
+        if course_id not in [course["course_id"] for course in courses]:
+            course = courses_collection.find_one({"course_id": course_id})
+            if course:
+                courses.append(
+                    {
+                        "course_id": course["course_id"],
+                        "title": course["title"],
+                    }
+                )
+                students_collection.update_one(
+                    {"_id": st.session_state.user_id},
+                    {"$set": {"enrolled_courses": courses}},
+                )
+                st.success(f"Enrolled in course {course_title}")
+                # st.experimental_rerun()
+            else:
+                st.error("Course not found")
+        else:
+            st.warning("Already enrolled in this course")
+# def enroll_in_course_page(course_id):
+#     """Enroll a student in a course"""
+#     student = students_collection.find_one({"_id": st.session_state.user_id})
+#     course_title = courses_collection.find_one({"course_id": course_id})["title"]
+#     course = courses_collection.find_one({"course_id": course_id})
+#     if course:
+#         st.title(course["title"])
+#         st.subheader("Course Description:")
+#         st.write(course["description"])
+#         st.write(f"Faculty: {course['faculty']}")
+#         st.write(f"Duration: {course['duration']}")
+#         st.title("Course Sessions")
+#         for session in course["sessions"]:
+#             st.write(f"Session: {session['title']}")
+#             st.write(f"Date: {session['date']}")
+#             st.write(f"Status: {session['status']}")
+#             st.write("----")
+#     else:
+#         st.error("Course not found")
+#     enroll_button = st.button("Enroll in Course", key="enroll_button", use_container_width=True)
+#     if enroll_button:
+#         enroll_in_course(course_id, course_title, student)
+def enroll_in_course_page(course_id):
+    """Display an aesthetically pleasing course enrollment page"""
+    student = students_collection.find_one({"_id": st.session_state.user_id})
+    course = courses_collection.find_one({"course_id": course_id})
+    if not course:
+        st.error("Course not found")
+        return
+    # Create two columns for layout
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        # Course header section
+        st.title(course["title"])
+        st.markdown(f"*{course['description']}*")
+        # Course details in an expander
+        with st.expander("Course Details", expanded=True):
+            st.markdown(f"👨‍🏫 **Faculty:** {course['faculty']}")
+            st.markdown(f"⏱️ **Duration:** {course['duration']}")
+        # Sessions in a clean card-like format
+        st.subheader("📚 Course Sessions")
+        for idx, session in enumerate(course["sessions"], 1):
+            with st.container():
+                st.markdown(f"""
+                ---
+                ### Session {idx}: {session['title']}
+                🗓️ **Date:** {session['date']}
+                📌 **Status:** {session['status']}
+                """)
+    with col2:
+        with st.container():
+            st.markdown("### Ready to Learn?")
+            st.markdown("Click below to enroll in this course")
+            # Check if already enrolled
+            courses = student.get("enrolled_courses", [])
+            is_enrolled = course_id in [c["course_id"] for c in courses]
+            if is_enrolled:
+                st.info("✅ You are already enrolled in this course")
+            else:
+                enroll_button = st.button(
+                    "🎓 Enroll Now",
+                    key="enroll_button",
+                    use_container_width=True
+                )
+                if enroll_button:
+                    enroll_in_course(course_id, course["title"], student)
+def show_available_courses(username, user_type, user_id):
+    """Display available courses for enrollment"""
+    st.title("Available Courses")
+    courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
+    course_options = [
+        f"{course['title']} ({course['course_id']})" for course in courses
+    ]
+    selected_course = st.selectbox("Select a Course to Enroll", course_options)
+    # if selected_courses:
+    #     for course in selected_courses:
+    #         course_id = course.split("(")[-1][:-1]
+    #         course_title = course.split(" (")[0]
+    #         enroll_in_course(course_id, course_title, user_id)
+    #     st.success("Courses enrolled successfully!")
+    if selected_course:
+        course_id = selected_course.split("(")[-1][:-1]
+        enroll_in_course_page(course_id)
+def validate_email(email):
+    """Validate email format and domain"""
+    # Basic email pattern
+    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
+    if not re.match(pattern, email):
+        return False, "Invalid email format"
+    # You can add additional institution-specific validation here
+    # For example, checking if the domain is from your institution
+    # allowed_domains = ["spit.ac.in"]  # Add more domains as needed
+    # domain = email.split('@')[1]
+    # if domain not in allowed_domains:
+    #     return False, "Please use your institutional email address"
+    return True, "Valid email"
+def validate_phone(phone):
+    """Validate phone number format"""
+    # Assuming Indian phone numbers
+    pattern = r'^[6-9]\d{9}$'
+    if not re.match(pattern, phone):
+        return False, "Invalid phone number format. Please enter a 10-digit Indian mobile number"
+    return True, "Valid phone number"
+def extract_username(email):
+    """Extract username from email"""
+    return email.split('@')[0]
+def main_dashboard():
+    if st.session_state.user_type == "research_assistant":
+        display_research_assistant_dashboard()
+    elif st.session_state.user_type == "analyst":
+        display_analyst_dashboard()
+    else:
+        selected_course_id = None
+        create_session = False
+        with st.sidebar:
+            st.title(f"Welcome, {st.session_state.username}")
+            if st.session_state.user_type == "student":
+                st.title("Enrolled Courses")
+            else:
+                st.title("Your Courses")
+            # Course selection
+            enrolled_courses = get_courses(
+                st.session_state.username, st.session_state.user_type
+            )
+            # Enroll in Courses
+            if st.session_state.user_type == "student":
+                if st.button(
+                    "Enroll in a New Course", key="enroll_course", use_container_width=True
+                ):
+                    st.session_state.show_enroll_course_page = True
+            # if st.session_state.show_enroll_course_form:
+            #     courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
+            #     courses += list(courses_collection2.find({}, {"course_id": 1, "title": 1}))
+            #     course_options = [f"{course['title']} ({course['course_id']})" for course in courses]
+            #     course_to_enroll = st.selectbox("Available Courses", course_options)
+            #     st.session_state.course_to_enroll = course_to_enroll
+            if st.session_state.user_type == "faculty":
+                if st.button(
+                    "Create New Course", key="create_course", use_container_width=True
+                ):
+                    st.session_state.show_create_course_form = True
+            if not enrolled_courses:
+                st.warning("No courses found")
+            else:
+                course_titles = [course["title"] for course in enrolled_courses]
+                course_ids = [course["course_id"] for course in enrolled_courses]
+                selected_course = st.selectbox("Select Course", course_titles)
+                selected_course_id = course_ids[course_titles.index(selected_course)]
+                print("Selected Course ID: ", selected_course_id)
+                st.session_state.selected_course = selected_course
+                st.session_state.selected_course_id = selected_course_id
+                # Display course sessions
+                sessions = get_sessions(selected_course_id, selected_course)
+                st.title("Course Sessions")
+                for i, session in enumerate(sessions, start=1):
+                    if st.button(
+                        f"Session {i}", key=f"session_{i}", use_container_width=True
+                    ):
+                        st.session_state.selected_session = session
+                if st.session_state.user_type == "faculty":
+                    # Create new session
+                    # create_session =  st.button("Create New Session Button", key="create_session", use_container_width=True)
+                    if st.button(
+                        "Create New Session",
+                        key="create_session",
+                        use_container_width=True,
+                    ):
+                        st.session_state.show_create_session_form = True
+            if st.button("Logout", use_container_width=True):
+                for key in st.session_state.keys():
+                    del st.session_state[key]
+                st.rerun()
+        # if create_session:
+        #     create_session_form(selected_course_id)
+        if st.session_state.get("show_create_course_form"):
+            create_course_form(st.session_state.username, st.session_state.user_id)
+        elif st.session_state.get("show_create_session_form"):
+            create_session_form(selected_course_id)
+        elif st.session_state.get("show_enroll_course_page"):
+            show_available_courses(st.session_state.username, st.session_state.user_type, st.session_state.user_id)
+        else:
+            # Main content
+            if "selected_session" in st.session_state:
+                display_session_content(
+                    st.session_state.user_id,
+                    selected_course_id,
+                    st.session_state.selected_session,
+                    st.session_state.username,
+                    st.session_state.user_type,
+                )
+            else:
+                st.info("Select a session to view details")
+        # # Main content
+        # if 'selected_session' in st.session_state:
+        #     display_session_content(st.session_state.user_id, selected_course_id, st.session_state.selected_session, st.session_state.username, st.session_state.user_type)
+        # if create_session:
+        #     create_session_form(selected_course_id)
+def main():
+    st.set_page_config(page_title="NOVAScholar", page_icon="📚", layout="wide")
+    init_session_state()
+    # modify_courses_collection_schema()
+    if not st.session_state.authenticated:
+        login_tab, register_tab = st.tabs(["Login", "Register"])
+        with register_tab:
+            register_page()
+        with login_tab:
+            login_form()
+    else:
+        main_dashboard()
+if __name__ == "__main__":
+    main()

chatbot.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import streamlit as st
+import datetime
+from db import courses_collection2, faculty_collection, students_collection, vectors_collection, chat_history_collection
+from PIL import Image
+from dotenv import load_dotenv
+import os
+from datetime import datetime
+from bson import ObjectId
+from file_upload_vectorize import model
+from gen_mcqs import generate_mcqs, quizzes_collection
+load_dotenv()
+MONGO_URI = os.getenv('MONGO_URI')
+OPENAI_KEY = os.getenv('OPENAI_KEY')
+GEMINI_KEY = os.getenv('GEMINI_KEY')
+def insert_chat_message(user_id, session_id, role, content):
+    message = {
+        "role": role,
+        "content": content,
+        "timestamp": datetime.utcnow()
+    }
+    chat_history_collection.update_one(
+        {"user_id": ObjectId(user_id), "session_id": session_id},
+        {"$push": {"messages": message}, "$set": {"timestamp": datetime.utcnow()}},
+        upsert=True
+    )
+def give_chat_response(user_id, session_id, question, title, description, context):
+    context_prompt = f"""
+    Based on the following session title, description, and context, answer the user's question in 3-4 lines:
+    Title: {title}
+    Description: {description}
+    Context: {context}
+    Question: {question}
+    Please provide a clear and concise answer based on the information provided.
+    """
+    response = model.generate_content(context_prompt)
+    if not response or not response.text:
+        return "No response received from the model"
+    assistant_response = response.text.strip()
+    # Save the chat message
+    insert_chat_message(user_id, session_id, "assistant", assistant_response)
+    return assistant_response
+def create_quiz_by_context(user_id, session_id, context, length, session_title, session_description):
+    """Create a quiz based on the context provided"""
+    quiz = generate_mcqs(context, length, session_title, session_description)
+    if not quiz:
+        return "No quiz generated";
+    # Save the quiz
+    quizzes_collection.insert_one({
+        "user_id": ObjectId(user_id),
+        "session_id": ObjectId(session_id),
+        "questions": quiz,
+        "timestamp": datetime.utcnow()
+    })
+    return "Quiz created successfully"

create_course.py ADDED Viewed

	@@ -0,0 +1,272 @@

+from datetime import datetime, timedelta
+import os
+from typing import Dict, List, Any
+from pymongo import MongoClient
+import requests
+import uuid
+import openai
+from openai import OpenAI
+import streamlit as st
+from bson import ObjectId
+from dotenv import load_dotenv
+import json
+load_dotenv()
+MONGODB_URI = os.getenv("MONGO_URI")
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
+OPENAI_API_KEY = os.getenv("OPENAI_KEY")
+client = MongoClient(MONGODB_URI)
+db = client['novascholar_db']
+courses_collection = db['courses']
+def generate_perplexity_response(api_key, course_name):
+        headers = {
+            "accept": "application/json",
+            "content-type": "application/json",
+            "authorization": f"Bearer {api_key}"
+        }
+        prompt = f"""
+        You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate comprehensive, academically rigorous course structures for undergraduate level education.
+        Please generate a detailed course structure for the course {course_name} in JSON format following these specifications:
+        1. The course structure should be appropriate for a full semester (14-16 weeks)
+        2. Each module should be designed for 2-4 weeks of instruction
+        3. Follow standard academic practices and nomenclature
+        4. Ensure progressive complexity from foundational to advanced concepts
+        5. The course_title should exactly match the course name provided in the prompt. No additional information should be included in the course_title field.
+        6: Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
+        7. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
+        The JSON response should follow this structure:
+        {{
+            "course_title": "string",
+            "course_description": "string",
+            "modules": [
+                {{
+                    "module_title": "string",
+                    "sub_modules": [
+                        {{
+                            "title": "string",
+                            "topics": [string],
+                        }}
+                    ]
+                }}
+            ]
+        }}
+        Example response:
+        {{
+            "course_title": "Advanced Natural Language Processing",
+            "course_descriptio": "An advanced course covering modern approaches to NLP using deep learning, with focus on transformer architectures and their applications.",
+            "modules": [
+                {{
+                    "module_title": "Foundations of Modern NLP",
+                    "sub_modules": [
+                        {{
+                            "title": "Attention Mechanism",
+                            "topics": [
+                                "Self-attention",
+                                "Multi-head attention",
+                                "Positional encoding"
+                            ]
+                        }}
+                    ]
+                }}
+            ]
+        }}
+        """
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You are an expert educational AI assistant specializing in course design and curriculum planning. "
+                    "Your task is to generate accurate, detailed, and structured educational content for undergraduate-level and post-graduate-level courses. "
+                    "Provide detailed and accurate information tailored to the user's prompt."
+                    "Ensure that the responses are logical, follow standard academic practices, and include realistic concepts relevant to the course."
+                ),
+            },
+            {
+                "role": "user",
+                "content": prompt
+            },
+        ]
+        try:
+            client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
+            response = client.chat.completions.create(
+                model="llama-3.1-sonar-small-128k-online",
+                messages=messages
+            )
+            content = response.choices[0].message.content
+            return content
+        except Exception as e:
+            st.error(f"Failed to fetch data from Perplexity API: {e}")
+            return ""
+def get_new_course_id():
+    """Generate a new course ID by incrementing the last course ID"""
+    last_course = courses_collection.find_one(sort=[("course_id", -1)])
+    if last_course:
+        last_course_id = int(last_course["course_id"][2:])
+        new_course_id = f"CS{last_course_id + 1}"
+    else:
+        new_course_id = "CS101"
+    return new_course_id
+def create_course(course_name, start_date, duration_weeks):
+        # Generate course overview
+        # overview_prompt = f"""Generate an overview for the undergraduate course {course_name}
+        # Include all relevant concepts and key topics covered in a typical curriculum.
+        # The response should be concise (300-400 words). Ensure that your response is in a valid JSON format."""
+        # overview_prompt2 = f"""Generate an overview for the undergraduate course {course_name}.
+        #                     The overview should include:
+        #                     The course title, a detailed course description,
+        #                     a division of all relevant concepts and key topics into 4-6 logical modules,
+        #                     capturing the flow and structure of a typical curriculum.
+        #                     Ensure the response adheres to the following JSON format:
+        #                         {{
+        #                             'overview': 'string',
+        #                             'modules': [
+        #                                 {{
+        #                                     'name': 'string',
+        #                                     'description': 'string'
+        #                                 }}
+        #                             ]
+        #                         }}
+        #                     overview: A detailed description of the course.
+        #                     modules: An array of 4-6 objects, each representing a logical module with a name and a brief description
+        #                     **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}"""
+        # course_overview = generate_perplexity_response(PERPLEXITY_API_KEY, overview_prompt2)
+        # # print(course_overview)
+        # course_overview_store = course_overview
+        # # print(course_overview_store)
+        # # Generate modules
+        # # modules_prompt = f"Based on this overview: {course_overview}\nCreate 4-6 logical modules for the course, each module should group related concepts and each module may include reference books if applicable"
+        # sub_modules_prompt = f"""Using the provided modules in the overview {course_overview_store}, generate 2-3 submodules for each module.
+        #                         Each submodule should represent a cohesive subset of the module's topics, logically organized for teaching purposes.
+        #                         Ensure the response adheres to the following JSON format:
+        #                         {
+        #                             'modules': [
+        #                                 {
+        #                                     'name': 'string',
+        #                                     'sub_modules': [
+        #                                         {
+        #                                             'name': 'string',
+        #                                             'description': 'string'
+        #                                         }
+        #                                     ]
+        #                                 }
+        #                             ]
+        #                         }
+        #                         modules: An array where each object contains the name of the module and its corresponding sub_modules.
+        #                         sub_modules: An array of 2-3 objects for each module, each having a name and a brief description."
+        #                         **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}
+        #                     """
+        # sub_modules = generate_perplexity_response(PERPLEXITY_API_KEY, sub_modules_prompt)
+        # # modules_response = generate_perplexity_response(modules_prompt)
+        # print(sub_modules)
+        # total_sessions = duration_weeks * sessions_per_week
+        course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
+        course_plan_json = json.loads(course_plan)
+        # Generate sessions for each module
+        all_sessions = []
+        for module in course_plan_json['modules']:
+            for sub_module in module['sub_modules']:
+                for topic in sub_module['topics']:
+                    session = create_session(
+                        title=topic,
+                        date=start_date,
+                        module_name=module['module_title']
+                    )
+                    # print(session)
+                    all_sessions.append(session)
+                    start_date += timedelta(days=7)  # Next session after a week
+        # sample_sessions = [
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def27'), 'title': 'Types of Generative AI (e.g., GANs, VAEs, LLMs)', 'date': datetime(2025, 1, 5, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 505626), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def28'), 'title': 'Overview of popular GenAI tools (e.g., ChatGPT, Claude, Google Gemini)', 'date': datetime(2025, 1, 12, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def29'), 'title': 'Frameworks for building GenAI models (e.g., TensorFlow, PyTorch)', 'date': datetime(2025, 1, 19, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2a'), 'title': 'Integration with other AI technologies', 'date': datetime(2025, 1, 26, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 507612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2b'), 'title': 'Text-to-text models (e.g., GPT-3, BERT)', 'date': datetime(2025, 2, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2c'), 'title': 'Text generation for content creation and marketing', 'date': datetime(2025, 2, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2d'), 'title': 'Chatbots and conversational interfaces', 'date': datetime(2025, 2, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2e'), 'title': 'Generative Adversarial Networks (GANs)', 'date': datetime(2025, 2, 23, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2f'), 'title': 'Variational Autoencoders (VAEs)', 'date': datetime(2025, 3, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 510612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def30'), 'title': 'Applications in art, design, and media', 'date': datetime(2025, 3, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def31'), 'title': 'Understanding prompt design principles', 'date': datetime(2025, 3, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def33'), 'title': 'Advanced techniques for fine-tuning models', 'date': datetime(2025, 3, 30, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 512514), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def34'), 'title': 'Ethical implications of AI-generated content', 'date': datetime(2025, 4, 6, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 513613), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def35'), 'title': 'Addressing bias in AI models', 'date': datetime(2025, 4, 13, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def36'), 'title': 'Regulatory frameworks and guidelines', 'date': datetime(2025, 4, 20, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def37'), 'title': 'Case studies from various industries (e.g., marketing, healthcare, finance)', 'date': datetime(2025, 4, 27, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def38'), 'title': 'Success stories and challenges faced by companies using GenAI', 'date': datetime(2025, 5, 4, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def39'), 'title': 'Guidelines for developing a GenAI project', 'date': datetime(2025, 5, 11, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def3a'), 'title': 'Tools and resources for project implementation', 'date': datetime(2025, 5, 18, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def3b'), 'title': 'Best practices for testing and deployment', 'date': datetime(2025, 5, 25, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 517563), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}}
+        # ]
+        # small_sample_sessions = [
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        # ]
+        # print(all_sessions)
+        print("Number of sessions:", len(all_sessions))
+        # Create course document
+        # course_description = course_plan_json['course_description']
+        # course_doc = {
+        #     "course_id": get_new_course_id(),
+        #     "title": course_name,
+        #     "description": course_description,
+        #     "faculty": faculty_name,
+        #     "faculty_id": faculty_id,
+        #     "duration": f"{duration_weeks} weeks",
+        #     "created_at": datetime.utcnow(),
+        #     "sessions": all_sessions
+        # }
+        # try:
+        #     courses_collection.insert_one(course_doc)
+        # except Exception as e:
+        #     st.error(f"Failed to insert course data into the database: {e}")
+        # print(course_plan)
+def create_session(title: str, date: datetime, module_name: str):
+        """Create a session document with pre-class, in-class, and post-class components."""
+        return {
+            "session_id": ObjectId(),
+            "title": title,
+            "date": date,
+            "status": "upcoming",
+            "created_at": datetime.utcnow(),
+            "pre_class": {
+                "resources": [],
+                "completion_required": True
+            },
+            "in_class": {
+                "quiz": [],
+                "polls": []
+            },
+            "post_class": {
+                "assignments": []
+            }
+        }
+# Usage example:
+if __name__ == "__main__":
+    create_course("Introduction to Data Analytics", datetime.now(), 2)

create_course2.py ADDED Viewed

	@@ -0,0 +1,331 @@

+from datetime import datetime, timedelta
+import os
+from typing import Dict, List, Any
+from pymongo import MongoClient
+import requests
+import uuid
+import openai
+from openai import OpenAI
+import streamlit as st
+from bson import ObjectId
+from dotenv import load_dotenv
+import json
+load_dotenv()
+MONGODB_URI = os.getenv("MONGO_URI")
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
+OPENAI_API_KEY = os.getenv("OPENAI_KEY")
+client = MongoClient(MONGODB_URI)
+db = client['novascholar_db']
+courses_collection = db['courses']
+def generate_perplexity_response(api_key, course_name, duration_weeks, sessions_per_week):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+        "authorization": f"Bearer {api_key}"
+    }
+    # Calculate sessions based on duration
+    total_sessions = duration_weeks * sessions_per_week  # Assuming 2 sessions per week
+    prompt = f"""
+    You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate a comprehensive, academically rigorous course structure for the course {course_name} that fits exactly within {duration_weeks} weeks with {total_sessions} total sessions ({sessions_per_week} sessions per week).
+    Please generate a detailed course structure in JSON format following these specifications:
+    1. The course structure must be designed for exactly {duration_weeks} weeks with {total_sessions} total sessions
+    2. Each module should contain an appropriate number of sessions that sum up to exactly {total_sessions}
+    3. Each session should be designed for a 1-1.5-hour class duration
+    4. Follow standard academic practices and nomenclature
+    5. Ensure progressive complexity from foundational to advanced concepts
+    6. The course_title should exactly match the course name provided
+    7. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
+    8. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
+    The JSON response should follow this structure:
+    {{
+        "course_title": "string",
+        "course_description": "string",
+        "total_duration_weeks": {duration_weeks},
+        "sessions_per_week": {sessions_per_week},
+        "total_sessions": {total_sessions},
+        "modules": [
+            {{
+                "module_title": "string",
+                "module_duration_sessions": number,
+                "sub_modules": [
+                    {{
+                        "title": "string",
+                        "topics": [
+                            {{
+                                "title": "string",
+                                "short_description": "string",
+                                "concise_learning_objectives": ["string"]
+                            }}
+                        ]
+                    }}
+                ]
+            }}
+        ]
+    }}
+    Ensure that:
+    1. The sum of all module_duration_sessions equals exactly {total_sessions}
+    2. Each topic has clear learning objectives
+    3. Topics build upon each other logically
+    4. Content is distributed evenly across the available sessions
+    5. **This Instruction is Strictly followed: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.****
+    """
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are an expert educational AI assistant specializing in course design and curriculum planning. "
+                "Your task is to generate accurate, detailed, and structured educational content that precisely fits "
+                "the specified duration."
+            ),
+        },
+        {
+            "role": "user",
+            "content": prompt
+        },
+    ]
+    try:
+        client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
+        response = client.chat.completions.create(
+            model="llama-3.1-sonar-small-128k-online",
+            messages=messages
+        )
+        content = response.choices[0].message.content
+        # Validate session count
+        course_plan = json.loads(content)
+        total_planned_sessions = sum(
+            module.get('module_duration_sessions', 0)
+            for module in course_plan.get('modules', [])
+        )
+        if abs(total_planned_sessions - total_sessions) > 5:
+            raise ValueError(f"Generated plan has {total_planned_sessions} sessions, but {total_sessions} were requested")
+        return content
+    except Exception as e:
+        st.error(f"Failed to fetch data from Perplexity API: {e}")
+        return ""
+def generate_session_resources(api_key, session_titles: List[str]):
+    """
+    Generate relevant resources for each session title separately
+    """
+    resources_prompt = f"""
+    You are an expert educational content curator. For each session title provided, suggest highly relevant and accurate learning resources.
+    Please provide resources for these sessions: {session_titles}
+    For each session, provide resources in this JSON format:
+    {{
+        "session_resources": [
+            {{
+                "session_title": "string",
+                "resources": {{
+                    "readings": [
+                        {{
+                            "title": "string",
+                            "url": "string",
+                            "type": "string",
+                            "estimated_read_time": "string"
+                        }}
+                    ],
+                    "books": [
+                        {{
+                            "title": "string",
+                            "author": "string",
+                            "isbn": "string",
+                            "chapters": "string"
+                        }}
+                    ],
+                    "additional_resources": [
+                        {{
+                            "title": "string",
+                            "url": "string",
+                            "type": "string",
+                            "description": "string"
+                        }}
+                    ]
+                }}
+            }}
+        ]
+    }}
+    Guidelines:
+    1. Ensure all URLs are real and currently active
+    2. Prioritize high-quality, authoritative sources
+    3. Include 1-2 resources of each type
+    5. For readings, include a mix of academic and practical resources. It can exceed to 3-4 readings
+    6. Book references should be real, recently published works
+    7. Additional resources can include tools, documentation, or practice platforms
+    8. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
+    9. ***NOTE: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
+    """
+    messages = [
+        {
+            "role": "system",
+            "content": "You are an expert educational content curator, focused on providing accurate and relevant learning resources.",
+        },
+        {
+            "role": "user",
+            "content": resources_prompt
+        },
+    ]
+    try:
+        client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
+        response = client.chat.completions.create(
+            model="llama-3.1-sonar-small-128k-online",
+            messages=messages
+        )
+        print("Response is: \n", response.choices[0].message.content)
+        # try:
+        #     return json.loads(response.choices[0].message.content)
+        # except json.JSONDecodeError as e:
+        #     st.error(f"Failed to decode JSON response: {e}")
+        #     return None
+        return response.choices[0].message.content
+    except Exception as e:
+        st.error(f"Failed to generate resources: {e}")
+        return None
+def validate_course_plan(course_plan):
+    required_fields = ['course_title', 'course_description', 'modules']
+    if not all(field in course_plan for field in required_fields):
+        raise ValueError("Invalid course plan structure")
+    for module in course_plan['modules']:
+        if 'module_title' not in module or 'sub_modules' not in module:
+            raise ValueError("Invalid module structure")
+def create_session(title: str, date: datetime, module_name: str, resources: dict):
+    """Create a session document with pre-class, in-class, and post-class components."""
+    return {
+        "session_id": ObjectId(),
+        "title": title,
+        "date": date,
+        "status": "upcoming",
+        "created_at": datetime.utcnow(),
+        "module_name": module_name,
+        "pre_class": {
+            "resources": [],
+            "completion_required": True
+        },
+        "in_class": {
+            "quiz": [],
+            "polls": []
+        },
+        "post_class": {
+            "assignments": []
+        },
+        "external_resources": {
+            "readings": resources.get("readings", []),
+            "books": resources.get("books", []),
+            "additional_resources": resources.get("additional_resources", [])
+        }
+    }
+def create_course(course_name: str, start_date: datetime, duration_weeks: int, sessions_per_week: int):
+    # First generate a course plan using Perplexity API
+    # course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name, duration_weeks, sessions_per_week)
+    # course_plan_json = json.loads(course_plan)
+    # print("Course Structure is: \n", course_plan_json);
+    # Earlier Code:
+    # Generate sessions for each module with resources
+    # all_sessions = []
+    # current_date = start_date
+    # for module in course_plan_json['modules']:
+    #     for sub_module in module['sub_modules']:
+    #         for topic in sub_module['topics']:
+    #             session = create_session(
+    #                 title=topic['title'],
+    #                 date=current_date,
+    #                 module_name=module['module_title'],
+    #                 resources=topic['resources']
+    #             )
+    #             all_sessions.append(session)
+    #             current_date += timedelta(days=3.5)  # Spacing sessions evenly across the week
+    # return course_plan_json, all_sessions
+    # New Code:
+    # Extract all session titles
+    session_titles = []
+    # Load the course plan JSON
+    course_plan_json = {}
+    with open('sample_files/sample_course.json', 'r') as file:
+        course_plan_json = json.load(file)
+    for module in course_plan_json['modules']:
+        for sub_module in module['sub_modules']:
+            for topic in sub_module['topics']:
+                session_titles.append(topic['title'])
+    # Generate resources for all sessions
+    session_resources = generate_session_resources(PERPLEXITY_API_KEY, session_titles)
+    # print("Session Resources are: \n", session_resources)
+    resources = json.loads(session_resources)
+    # print("Resources JSON is: \n", resources_json)
+    # print("Session Resources are: \n", session_resources)
+    # Create a mapping of session titles to their resources
+    # Import Resources JSON
+    # resources = {}
+    # with open('sample_files/sample_course_resources.json', 'r') as file:
+    #     resources = json.load(file)
+    resources_map = {
+        resource['session_title']: resource['resources']
+        for resource in resources['session_resources']
+    }
+    print("Resources Map is: \n", resources_map)
+    # print("Sample is: ", resources_map.get('Overview of ML Concepts, History, and Applications'));
+    # Generate sessions with their corresponding resources
+    all_sessions = []
+    current_date = start_date
+    for module in course_plan_json['modules']:
+        for sub_module in module['sub_modules']:
+            for topic in sub_module['topics']:
+                session = create_session(
+                    title=topic['title'],
+                    date=current_date,
+                    module_name=module['module_title'],
+                    resources=resources_map.get(topic['title'], {})
+                )
+                all_sessions.append(session)
+                current_date += timedelta(days=3.5)
+    print("All Sessions are: \n", all_sessions)
+def get_new_course_id():
+    """Generate a new course ID by incrementing the last course ID"""
+    last_course = courses_collection.find_one(sort=[("course_id", -1)])
+    if last_course:
+        last_course_id = int(last_course["course_id"][2:])
+        new_course_id = f"CS{last_course_id + 1}"
+    else:
+        new_course_id = "CS101"
+    return new_course_id
+# if __name__ == "__main__":
+#     course_name = "Introduction to Machine Learning"
+#     start_date = datetime(2022, 9, 1)
+#     duration_weeks = 4
+#     create_course(course_name, start_date, duration_weeks, 3)

db.py ADDED Viewed

	@@ -0,0 +1,696 @@

+# Setup for MongoDB
+from pymongo import MongoClient
+from datetime import datetime
+from werkzeug.security import generate_password_hash
+import os
+from dotenv import load_dotenv
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+client = MongoClient(MONGO_URI)
+try:
+    client.admin.command("ping")
+    print("MongoDB connection successful")
+except Exception as e:
+    print(f"MongoDB connection failed: {e}")
+db = client["novascholar_db"]
+########
+# Research Assistant Schema
+research_assistant_schema = {
+    "bsonType": "object",
+    "required": ["full_name", "password", "email", "courses_assisted"],
+    "properties": {
+        "full_name": {
+            "bsonType": "string",
+            "description": "Full name of the research assistant",
+        },
+        "password": {
+            "bsonType": "string",
+            "description": "Hashed password of the research assistant",
+        },
+        "email": {
+            "bsonType": "string",
+            "description": "Email address of the research assistant",
+        },
+        "courses_assisted": {
+            "bsonType": "array",
+            "description": "List of courses the research assistant is assisting",
+            "items": {
+                "bsonType": "object",
+                "required": ["course_id"],
+                "properties": {
+                    "course_id": {
+                        "bsonType": "string",
+                        "description": "ID of the course",
+                    }
+                },
+            },
+        },
+    },
+}
+# Create research assistants collection
+research_assistants_collection = db["research_assistants"]
+# Create indexes
+research_assistants_collection.create_index("full_name", unique=True)
+research_assistants_collection.create_index("email", unique=True)
+# Optional: Sample data insertion function
+def insert_sample_research_assistants():
+    sample_research_assistants = [
+        {
+            "full_name": "John Doe RA",
+            "password": generate_password_hash("password123"),
+            "email": "[email protected]",
+            "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
+        }
+    ]
+    try:
+        research_assistants_collection.insert_many(sample_research_assistants)
+        print("Sample research assistants inserted successfully!")
+    except Exception as e:
+        print(f"Error inserting sample research assistants: {e}")
+###########
+###############
+# Add after research assistant schema
+# Analyst Schema
+analyst_schema = {
+    "bsonType": "object",
+    "required": ["full_name", "password", "email", "courses_analyzed"],
+    "properties": {
+        "full_name": {"bsonType": "string", "description": "Full name of the analyst"},
+        "password": {
+            "bsonType": "string",
+            "description": "Hashed password of the analyst",
+        },
+        "email": {"bsonType": "string", "description": "Email address of the analyst"},
+        "courses_analyzed": {
+            "bsonType": "array",
+            "description": "List of courses the analyst is analyzing",
+            "items": {
+                "bsonType": "object",
+                "required": ["course_id"],
+                "properties": {
+                    "course_id": {
+                        "bsonType": "string",
+                        "description": "ID of the course",
+                    }
+                },
+            },
+        },
+    },
+}
+# Create analysts collection
+analysts_collection = db["analysts"]
+# Create indexes for analysts
+analysts_collection.create_index("full_name", unique=True)
+analysts_collection.create_index("email", unique=True)
+def insert_sample_analysts():
+    sample_analysts = [
+        {
+            "full_name": "jane",
+            "password": generate_password_hash("jane"),
+            "email": "[email protected]",
+            "courses_analyzed": [{"course_id": "CS101"}, {"course_id": "CS102"}],
+        }
+    ]
+    try:
+        analysts_collection.insert_many(sample_analysts)
+        print("Sample analysts inserted successfully!")
+    except Exception as e:
+        print(f"Error inserting sample analysts: {e}")
+##############@
+# Define the course schema
+course_schema = {
+    "bsonType": "object",
+    "required": [
+        "course_id",
+        "title",
+        "description",
+        "faculty",
+        "faculty_id",
+        "duration",
+        "created_at",
+    ],
+    "properties": {
+        "course_id": {
+            "bsonType": "string",
+            "description": "Unique identifier for the course",
+        },
+        "title": {"bsonType": "string", "description": "Title of the course"},
+        "description": {
+            "bsonType": "string",
+            "description": "Description of the course",
+        },
+        "faculty": {"bsonType": "string", "description": "Name of the faculty"},
+        "duration": {"bsonType": "string", "description": "Duration of the course"},
+        "created_at": {
+            "bsonType": "date",
+            "description": "Date when the course was created",
+        },
+        "sessions": {
+            "bsonType": "array",
+            "description": "List of sessions associated with the course",
+            "items": {
+                "bsonType": "object",
+                "required": ["session_id", "title", "date", "status", "created_at"],
+                "properties": {
+                    "session_id": {
+                        "bsonType": "string",
+                        "description": "Unique identifier for the session",
+                    },
+                    "title": {
+                        "bsonType": "string",
+                        "description": "Title of the session",
+                    },
+                    "date": {"bsonType": "date", "description": "Date of the session"},
+                    "status": {
+                        "bsonType": "string",
+                        "description": "Status of the session (e.g., completed, upcoming)",
+                    },
+                    "created_at": {
+                        "bsonType": "date",
+                        "description": "Date when the session was created",
+                    },
+                    "pre_class": {
+                        "bsonType": "object",
+                        "description": "Pre-class segment data",
+                        "properties": {
+                            "resources": {
+                                "bsonType": "array",
+                                "description": "List of pre-class resources",
+                                "items": {
+                                    "bsonType": "object",
+                                    "required": ["type", "title", "url"],
+                                    "properties": {
+                                        "type": {
+                                            "bsonType": "string",
+                                            "description": "Type of resource (e.g., pdf, video)",
+                                        },
+                                        "title": {
+                                            "bsonType": "string",
+                                            "description": "Title of the resource",
+                                        },
+                                        "url": {
+                                            "bsonType": "string",
+                                            "description": "URL of the resource",
+                                        },
+                                        "vector": {
+                                            "bsonType": "array",
+                                            "description": "Vector representation of the resource",
+                                            "items": {"bsonType": "double"},
+                                        },
+                                    },
+                                },
+                            },
+                            "completion_required": {
+                                "bsonType": "bool",
+                                "description": "Indicates if completion of pre-class resources is required",
+                            },
+                        },
+                    },
+                    "in_class": {
+                        "bsonType": "object",
+                        "description": "In-class segment data",
+                        "properties": {
+                            "topics": {
+                                "bsonType": "array",
+                                "description": "List of topics covered in the session",
+                                "items": {"bsonType": "string"},
+                            },
+                            "quiz": {
+                                "bsonType": "object",
+                                "description": "Quiz data",
+                                "properties": {
+                                    "title": {
+                                        "bsonType": "string",
+                                        "description": "Title of the quiz",
+                                    },
+                                    "questions": {
+                                        "bsonType": "int",
+                                        "description": "Number of questions in the quiz",
+                                    },
+                                    "duration": {
+                                        "bsonType": "int",
+                                        "description": "Duration of the quiz in minutes",
+                                    },
+                                },
+                            },
+                            "polls": {
+                                "bsonType": "array",
+                                "description": "List of polls conducted during the session",
+                                "items": {
+                                    "bsonType": "object",
+                                    "required": ["question", "options"],
+                                    "properties": {
+                                        "question": {
+                                            "bsonType": "string",
+                                            "description": "Poll question",
+                                        },
+                                        "options": {
+                                            "bsonType": "array",
+                                            "description": "List of poll options",
+                                            "items": {"bsonType": "string"},
+                                        },
+                                        "responses": {
+                                            "bsonType": "object",
+                                            "description": "Responses to the poll",
+                                            "additionalProperties": {"bsonType": "int"},
+                                        },
+                                    },
+                                },
+                            },
+                        },
+                    },
+                    "post_class": {
+                        "bsonType": "object",
+                        "description": "Post-class segment data",
+                        "properties": {
+                            "assignments": {
+                                "bsonType": "array",
+                                "description": "List of assignments",
+                                "items": {
+                                    "bsonType": "object",
+                                    "required": ["id", "title", "due_date", "status"],
+                                    "properties": {
+                                        "id": {
+                                            "bsonType": "int",
+                                            "description": "Assignment ID",
+                                        },
+                                        "title": {
+                                            "bsonType": "string",
+                                            "description": "Title of the assignment",
+                                        },
+                                        "due_date": {
+                                            "bsonType": "date",
+                                            "description": "Due date of the assignment",
+                                        },
+                                        "status": {
+                                            "bsonType": "string",
+                                            "description": "Status of the assignment (e.g., pending, completed)",
+                                        },
+                                        "submissions": {
+                                            "bsonType": "array",
+                                            "description": "List of submissions",
+                                            "items": {
+                                                "bsonType": "object",
+                                                "required": [
+                                                    "student_id",
+                                                    "file_url",
+                                                    "submitted_at",
+                                                ],
+                                                "properties": {
+                                                    "student_id": {
+                                                        "bsonType": "string",
+                                                        "description": "ID of the student who submitted the assignment",
+                                                    },
+                                                    "file_url": {
+                                                        "bsonType": "string",
+                                                        "description": "URL of the submitted file",
+                                                    },
+                                                    "submitted_at": {
+                                                        "bsonType": "date",
+                                                        "description": "Date when the assignment was submitted",
+                                                    },
+                                                },
+                                            },
+                                        },
+                                    },
+                                },
+                            }
+                        },
+                    },
+                },
+            },
+        },
+    },
+}
+# Create the collection with the schema
+# db.create_collection("courses_collection2", validator={"$jsonSchema": course_schema})
+# sample_course = {
+#     "course_id": "CS101",
+#     "title": "Introduction to Computer Science",
+#     "description": "This course covers the basics of computer science and programming.",
+#     "faculty": "Dr. John Doe",
+#     "faculty_id": "F101",
+#     "duration": "10 weeks",
+#     "created_at": datetime.utcnow(),
+#     "sessions": [
+#         {
+#             "session_id": "S101",
+#             "title": "Introduction to Programming Fundamentals",
+#             "date": datetime.utcnow() - timedelta(days=7),
+#             "status": "completed",
+#             "created_at": datetime.utcnow() - timedelta(days=7),
+#             "pre_class": {
+#                 "resources": [
+#                     {
+#                         "type": "pdf",
+#                         "title": "Introduction to Python Basics",
+#                         "url": "/assets/python_basics.pdf",
+#                         "vector": [0.1, 0.2, 0.3]  # Example vector
+#                     }
+#                 ],
+#                 "completion_required": True
+#             },
+#             "in_class": {
+#                 "topics": ["Variables", "Data Types", "Basic Operations"],
+#                 "quiz": {
+#                     "title": "Python Basics Quiz",
+#                     "questions": 5,
+#                     "duration": 15
+#                 },
+#                 "polls": [
+#                     {
+#                         "question": "How comfortable are you with Python syntax?",
+#                         "options": ["Very", "Somewhat", "Not at all"],
+#                         "responses": {"Very": 10, "Somewhat": 5, "Not at all": 2}
+#                     }
+#                 ]
+#             },
+#             "post_class": {
+#                 "assignments": [
+#                     {
+#                         "id": 1,
+#                         "title": "Basic Python Programs",
+#                         "due_date": datetime.utcnow() + timedelta(days=2),
+#                         "status": "pending",
+#                         "submissions": []
+#                     }
+#                 ]
+#             }
+#         },
+#         {
+#             "session_id": "S102",
+#             "title": "Control Flow and Functions",
+#             "date": datetime.utcnow() - timedelta(days=3),
+#             "status": "completed",
+#             "created_at": datetime.utcnow() - timedelta(days=3),
+#             "pre_class": {
+#                 "resources": [
+#                     {
+#                         "type": "pdf",
+#                         "title": "Control Flow in Python",
+#                         "url": "/assets/control_flow.pdf",
+#                         "vector": [0.4, 0.5, 0.6]  # Example vector
+#                     }
+#                 ],
+#                 "completion_required": True
+#             },
+#             "in_class": {
+#                 "topics": ["If-else statements", "Loops", "Function definitions"],
+#                 "quiz": {
+#                     "title": "Control Flow Quiz",
+#                     "questions": 8,
+#                     "duration": 20
+#                 },
+#                 "polls": [
+#                     {
+#                         "question": "Which loop type do you find more intuitive?",
+#                         "options": ["For loops", "While loops", "Both"],
+#                         "responses": {"For loops": 12, "While loops": 8, "Both": 10}
+#                     }
+#                 ]
+#             },
+#             "post_class": {
+#                 "assignments": [
+#                     {
+#                         "id": 2,
+#                         "title": "Function Implementation Exercise",
+#                         "due_date": datetime.utcnow() + timedelta(days=4),
+#                         "status": "pending",
+#                         "submissions": []
+#                     }
+#                 ]
+#             }
+#         }
+#     ]
+# }
+courses_collection2 = db["courses_collection2"]
+# Define the users schema
+users_schema = {
+    "bsonType": "object",
+    "required": ["user_id", "username", "password", "role", "created_at"],
+    "properties": {
+        "user_id": {
+            "bsonType": "string",
+            "description": "Unique identifier for the user",
+        },
+        "username": {"bsonType": "string", "description": "Name of the User"},
+        "password": {"bsonType": "string", "description": "Password of the user"},
+        "role": {
+            "bsonType": "string",
+            "description": "Type of user (e.g., student, faculty)",
+        },
+        "created_at": {
+            "bsonType": "date",
+            "description": "Date when the user was created",
+        },
+    },
+}
+# Create the collection with the schema
+# db.create_collection("users", validator={"$jsonSchema": users_schema})
+users_collection = db["users"]
+# Defining the Student Collection
+student_schema = {
+    "bsonType": "object",
+    "required": ["SID", "full_name", "password", "enrolled_courses", "created_at"],
+    "properties": {
+        "SID": {
+            "bsonType": "string",
+            "description": "Unique identifier for the student",
+        },
+        "full_name": {"bsonType": "string", "description": "Full name of the student"},
+        "password": {
+            "bsonType": "string",
+            "description": "Hashed password of the student",
+        },
+        "enrolled_courses": {
+            "bsonType": "array",
+            "description": "List of courses the student is enrolled in",
+            "items": {
+                "bsonType": "object",
+                "required": ["course_id", "title"],
+                "properties": {
+                    "course_id": {
+                        "bsonType": "string",
+                        "description": "Unique identifier for the course",
+                    },
+                    "title": {
+                        "bsonType": "string",
+                        "description": "Title of the course",
+                    },
+                },
+            },
+        },
+        "created_at": {
+            "bsonType": "date",
+            "description": "Date when the student was created",
+        },
+    },
+}
+# Defining the Faculty Collection
+faculty_schema = {
+    "bsonType": "object",
+    "required": ["TID", "full_name", "password", "courses_taught", "created_at"],
+    "properties": {
+        "TID": {
+            "bsonType": "string",
+            "description": "Unique identifier for the faculty",
+        },
+        "full_name": {"bsonType": "string", "description": "Full name of the faculty"},
+        "password": {
+            "bsonType": "string",
+            "description": "Hashed password of the faculty",
+        },
+        "courses_taught": {
+            "bsonType": "array",
+            "description": "List of courses the faculty is teaching",
+            "items": {
+                "bsonType": "object",
+                "required": ["course_id", "title"],
+                "properties": {
+                    "course_id": {
+                        "bsonType": "string",
+                        "description": "Unique identifier for the course",
+                    },
+                    "title": {
+                        "bsonType": "string",
+                        "description": "Title of the course",
+                    },
+                },
+            },
+        },
+        "created_at": {
+            "bsonType": "date",
+            "description": "Date when the faculty was created",
+        },
+    },
+}
+# Creating the Collections
+# db.create_collection("students", validator={"$jsonSchema": student_schema})
+# db.create_collection("faculty", validator={"$jsonSchema": faculty_schema})
+students_collection = db["students"]
+faculty_collection = db["faculty"]
+# Defining the Vector Collection Schema
+vector_schema = {
+    "bsonType": "object",
+    "required": ["resource_id", "vector"],
+    "properties": {
+        "resource_id": {
+            "bsonType": "objectId",
+            "description": "Unique identifier for the resource",
+        },
+        "vector": {
+            "bsonType": "array",
+            "description": "Vector representation of the resource",
+            "items": {"bsonType": "double"},
+        },
+        "text": {"bsonType": "string", "description": "Text content of the resource"},
+        "created_at": {
+            "bsonType": "date",
+            "description": "Date when the vector was created",
+        },
+    },
+}
+# Creating the Vector Collection
+# db.create_collection("vectors", validator={"$jsonSchema": vector_schema})
+vectors_collection = db["vectors"]
+# Creating a Chat-History Collection
+# Creating a Chat-History Collection
+chat_history_schema = {
+    "bsonType": "object",
+    "required": ["user_id", "session_id", "messages", "timestamp"],
+    "properties": {
+        "user_id": {
+            "bsonType": "objectId",
+            "description": "Unique identifier for the user",
+        },
+        "session_id": {
+            "bsonType": "string",
+            "description": "Identifier for the session",
+        },
+        "timestamp": {
+            "bsonType": "date",
+            "description": "Timestamp when the chat session started",
+        },
+        "messages": {
+            "bsonType": "array",
+            "description": "List of chat messages",
+            "items": {
+                "bsonType": "object",
+                "properties": {
+                    "prompt": {
+                        "bsonType": "string",
+                        "description": "User's question or prompt",
+                    },
+                    "response": {
+                        "bsonType": "string",
+                        "description": "Assistant's response",
+                    },
+                    "timestamp": {
+                        "bsonType": "date",
+                        "description": "Timestamp of the message",
+                    },
+                },
+            },
+        },
+    },
+}
+# Create the collection with the schema
+# db.create_collection("chat_history", validator={"$jsonSchema": chat_history_schema})
+chat_history_collection = db["chat_history"]
+# Database setup for Research Assistant
+# Research Assistant Schema
+research_assistant_schema = {
+    "bsonType": "object",
+    "required": ["full_name", "password", "email", "courses_assisted"],
+    "properties": {
+        "full_name": {
+            "bsonType": "string",
+            "description": "Full name of the research assistant",
+        },
+        "password": {
+            "bsonType": "string",
+            "description": "Hashed password of the research assistant",
+        },
+        "email": {
+            "bsonType": "string",
+            "description": "Email address of the research assistant",
+        },
+        "courses_assisted": {
+            "bsonType": "array",
+            "description": "List of courses the research assistant is assisting",
+            "items": {
+                "bsonType": "object",
+                "required": ["course_id"],
+                "properties": {
+                    "course_id": {
+                        "bsonType": "string",
+                        "description": "ID of the course",
+                    }
+                },
+            },
+        },
+    },
+}
+# Create research assistants collection
+research_assistants_collection = db["research_assistants"]
+# Create indexes
+research_assistants_collection.create_index("full_name", unique=True)
+research_assistants_collection.create_index("email", unique=True)
+# Optional: Sample data insertion function
+# def insert_sample_research_assistants():
+#     sample_research_assistants = [
+#         {
+#             "full_name": "John Doe RA",
+#             "password": generate_password_hash("password123"),
+#             "email": "[email protected]",
+#             "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
+#         }
+#     ]
+#     try:
+#         research_assistants_collection.insert_many(sample_research_assistants)
+#         print("Sample research assistants inserted successfully!")
+#     except Exception as e:
+#         print(f"Error inserting sample research assistants: {e}")
+# if __name__ == "__main__":
+#     insert_sample_analysts()

entire_download.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import streamlit as st
+import pandas as pd
+from pymongo import MongoClient
+from dotenv import load_dotenv
+import os
+# 1. Load environment variables
+load_dotenv()
+MONGODB_URI = os.getenv(
+    "MONGODB_UR",
+    "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
+)
+# 2. Create MongoDB connection
+client = MongoClient(MONGODB_URI)
+db = client["novascholar_db"]
+collection = db["research_papers"]
+def get_collection_data(paper_type: str):
+    """
+    Fetch all documents from the specified collection based on paper type.
+    """
+    try:
+        # Determine collection name based on paper type
+        collection_name = paper_type.replace(" ", "_").lower()
+        doc_collection = db[collection_name]
+        # Get all documents
+        docs = list(doc_collection.find())
+        # Convert ObjectId to string
+        for doc in docs:
+            doc["_id"] = str(doc["_id"])
+        return docs
+    except Exception as e:
+        st.error(f"Database Error: {str(e)}")
+        return None
+def main():
+    st.title("MongoDB Collection Download")
+    st.write("Download all documents from the selected research paper collection")
+    # Dropdown to select the type of research paper
+    paper_type = st.selectbox(
+        "Select type of research paper:",
+        [
+            "Review Based Paper",
+            "Opinion/Perspective Based Paper",
+            "Empirical Research Paper",
+            "Research Paper (Other)",
+        ],
+    )
+    if st.button("Fetch Data"):
+        with st.spinner("Retrieving documents from MongoDB..."):
+            docs = get_collection_data(paper_type)
+            if docs:
+                # Convert to DataFrame
+                df = pd.DataFrame(docs)
+                # Convert lists to comma-separated strings for consistency
+                for col in df.columns:
+                    if df[col].apply(lambda x: isinstance(x, list)).any():
+                        df[col] = df[col].apply(
+                            lambda x: (
+                                ", ".join(map(str, x)) if isinstance(x, list) else x
+                            )
+                        )
+                st.success(
+                    f"Successfully retrieved {len(df)} documents from '{paper_type}' collection."
+                )
+                st.dataframe(df)
+                # Provide option to download the data as CSV
+                csv = df.to_csv(index=False).encode("utf-8")
+                st.download_button(
+                    label="Download CSV",
+                    data=csv,
+                    file_name=f"{paper_type.replace(' ', '_').lower()}_papers.csv",
+                    mime="text/csv",
+                )
+            else:
+                st.warning(f"No documents found in the '{paper_type}' collection.")
+if __name__ == "__main__":
+    main()

extract.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import streamlit as st
+import pandas as pd
+import PyPDF2
+import io
+import os
+from dotenv import load_dotenv
+import requests
+import time
+# Load environment variables
+load_dotenv()
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
+PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
+def call_perplexity_api(prompt: str) -> str:
+    """Call Perplexity AI with a prompt, return the text response if successful."""
+    headers = {
+        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    payload = {
+        "model": "llama-3.1-sonar-small-128k-chat",
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.3,
+    }
+    try:
+        response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except Exception as e:
+        st.error(f"API Error: {str(e)}")
+        return ""
+def extract_text_from_pdf(pdf_file):
+    """Extract text content from a PDF file."""
+    pdf_reader = PyPDF2.PdfReader(pdf_file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text() + "\n"
+    return text
+def analyze_paper(text: str, category: str) -> str:
+    """Generate a prompt and get analysis for a specific category."""
+    prompts = {
+        "Summarized Abstract": "Extract and summarize the abstract from this research paper:",
+        "Results": "What are the main results and findings from this research paper:",
+        "Summarized Introduction": "Summarize the introduction section of this research paper:",
+        "Methods Used": "What are the main methods and methodologies used in this research:",
+        "Literature Survey": "Summarize the literature review or related work from this paper:",
+        "Limitations": "What are the limitations mentioned in this research:",
+        "Contributions": "What are the main contributions of this research:",
+        "Practical Implications": "What are the practical implications of this research:",
+        "Objectives": "What are the main objectives of this research:",
+        "Findings": "What are the key findings from this research:",
+        "Future Research": "What future research directions are suggested in this paper:",
+        "Dependent Variables": "What are the dependent variables studied in this research:",
+        "Independent Variables": "What are the independent variables studied in this research:",
+        "Dataset": "What dataset(s) were used in this research:",
+        "Problem Statement": "What is the main problem statement or research question:",
+        "Challenges": "What challenges were faced or addressed in this research:",
+        "Applications": "What are the potential applications of this research:"
+    }
+    prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}"  # Limit text to avoid token limits
+    return call_perplexity_api(prompt)
+def main():
+    st.title("Research Paper Analysis Tool")
+    # File uploader
+    uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
+    if uploaded_files:
+        if st.button("Process Papers"):
+            # Initialize progress bar
+            progress_bar = st.progress(0)
+            status_text = st.empty()
+            # Initialize results dictionary
+            results = []
+            # Define categories
+            categories = [
+                "Summarized Abstract", "Results", "Summarized Introduction",
+                "Methods Used", "Literature Survey", "Limitations",
+                "Contributions", "Practical Implications", "Objectives",
+                "Findings", "Future Research", "Dependent Variables",
+                "Independent Variables", "Dataset", "Problem Statement",
+                "Challenges", "Applications"
+            ]
+            # Process each file
+            for i, file in enumerate(uploaded_files):
+                status_text.text(f"Processing {file.name}...")
+                # Extract text from PDF
+                text = extract_text_from_pdf(file)
+                # Initialize paper results
+                paper_results = {"Filename": file.name}
+                # Analyze each category
+                for j, category in enumerate(categories):
+                    status_text.text(f"Processing {file.name} - {category}")
+                    paper_results[category] = analyze_paper(text, category)
+                    # Update progress
+                    progress = (i * len(categories) + j + 1) / (len(uploaded_files) * len(categories))
+                    progress_bar.progress(progress)
+                    # Add small delay to avoid API rate limits
+                    time.sleep(1)
+                results.append(paper_results)
+            # Create DataFrame
+            df = pd.DataFrame(results)
+            # Convert DataFrame to CSV
+            csv = df.to_csv(index=False)
+            # Create download button
+            st.download_button(
+                label="Download Results as CSV",
+                data=csv,
+                file_name="research_papers_analysis.csv",
+                mime="text/csv"
+            )
+            # Display results in the app
+            st.subheader("Analysis Results")
+            st.dataframe(df)
+            status_text.text("Processing complete!")
+            progress_bar.progress(1.0)
+if __name__ == "__main__":
+    main()

file_upload_vectorize.py ADDED Viewed

	@@ -0,0 +1,179 @@

+from pymongo import MongoClient
+from datetime import datetime
+import openai
+import google.generativeai as genai
+import streamlit as st
+from db import courses_collection2, faculty_collection, students_collection, vectors_collection
+from PIL import Image
+import PyPDF2, docx, io
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
+from bson import ObjectId
+from dotenv import load_dotenv
+import os
+from create_course import courses_collection
+load_dotenv()
+MONGO_URI = os.getenv('MONGO_URI')
+OPENAI_KEY = os.getenv('OPENAI_KEY')
+GEMINI_KEY = os.getenv('GEMINI_KEY')
+client = MongoClient(MONGO_URI)
+db = client['novascholar_db']
+resources_collection = db['resources']
+# Configure APIs
+openai.api_key = OPENAI_KEY
+genai.configure(api_key=GEMINI_KEY)
+model = genai.GenerativeModel('gemini-pro')
+def upload_resource(course_id, session_id, file_name, file_content, material_type):
+    # material_data = {
+    #     "session_id": session_id,
+    #     "course_id": course_id,
+    #     "file_name": file_name,
+    #     "file_content": file_content,
+    #     "material_type": material_type,
+    #     "uploaded_at": datetime.utcnow()
+    # }
+    # return resources_collection.insert_one(material_data)
+    # resource_id = ObjectId()
+    # Extract text content from the file
+    text_content = extract_text_from_file(file_content)
+    # Check if a resource with this file name already exists
+    existing_resource = resources_collection.find_one({
+        "session_id": session_id,
+        "file_name": file_name
+    })
+    if existing_resource:
+        return existing_resource["_id"]
+    # Read the file content
+    file_content.seek(0)  # Reset the file pointer to the beginning
+    original_file_content = file_content.read()
+    resource_data = {
+        "_id": ObjectId(),
+        "course_id": course_id,
+        "session_id": session_id,
+        "file_name": file_name,
+        "file_type": file_content.type,
+        "text_content": text_content,
+        "file_content": original_file_content,  # Store the original file content
+        "material_type": material_type,
+        "uploaded_at": datetime.utcnow()
+    }
+    resources_collection.insert_one(resource_data)
+    resource_id = resource_data["_id"]
+    courses_collection.update_one(
+        {
+            "course_id": course_id,
+            "sessions.session_id": session_id
+        },
+        {
+            "$push": {"sessions.$.pre_class.resources": resource_id}
+        }
+    )
+    # print("End of Upload Resource, Resource ID is: ", resource_id)
+    # return resource_id
+    if text_content:
+        create_vector_store(text_content, resource_id)
+    return resource_id
+def assignment_submit(student_id, course_id, session_id, assignment_id,  file_name, file_content, text_content, material_type):
+    # Read the file content
+    file_content.seek(0)  # Reset the file pointer to the beginning
+    original_file_content = file_content.read()
+    assignment_data = {
+        "student_id": student_id,
+        "course_id": course_id,
+        "session_id": session_id,
+        "assignment_id": assignment_id,
+        "file_name": file_name,
+        "file_type": file_content.type,
+        "file_content": original_file_content,  # Store the original file content
+        "text_content": text_content,
+        "material_type": material_type,
+        "submitted_at": datetime.utcnow(),
+        "file_url": "sample_url"
+    }
+    try:
+        courses_collection2.update_one(
+            {
+                "course_id": course_id,
+                "sessions.session_id": session_id,
+                "sessions.post_class.assignments.id": assignment_id
+            },
+            {
+                "$push": {"sessions.$.post_class.assignments.$[assignment].submissions": assignment_data}
+            },
+            array_filters=[{"assignment.id": assignment_id}]
+        )
+        return True
+    except Exception as db_error:
+        print(f"Error saving submission: {str(db_error)}")
+        return False
+def extract_text_from_file(uploaded_file):
+    text = ""
+    file_type = uploaded_file.type
+    try:
+        if file_type == "text/plain":
+            text = uploaded_file.getvalue().decode("utf-8")
+        elif file_type == "application/pdf":
+            pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file.getvalue()))
+            for page in pdf_reader.pages:
+                text += page.extract_text() + "\n"
+        elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+            doc = docx.Document(io.BytesIO(uploaded_file.getvalue()))
+            for para in doc.paragraphs:
+                text += para.text + "\n"
+        return text
+    except Exception as e:
+        st.error(f"Error processing file: {str(e)}")
+        return None
+def get_embedding(text):
+    response = openai.embeddings.create(
+        model="text-embedding-ada-002",
+        input=text
+    )
+    return response.data[0].embedding
+def create_vector_store(text, resource_id):
+    # resource_object_id = ObjectId(resource_id)
+    # Ensure resource_id is an ObjectId
+    # if not isinstance(resource_id, ObjectId):
+    #     resource_id = ObjectId(resource_id)
+    existing_vector = vectors_collection.find_one({
+        "resource_id": resource_id,
+        "text": text
+    })
+    if existing_vector:
+        print(f"Vector already exists for Resource ID: {resource_id}")
+        return
+    print(f"In Vector Store method, Resource ID is: {resource_id}")
+    document = Document(text=text)
+    embedding = get_embedding(text)
+    vector_data = {
+        "resource_id": resource_id,
+        "vector": embedding,
+        "text": text,
+        "created_at": datetime.utcnow()
+    }
+    vectors_collection.insert_one(vector_data)
+    # return VectorStoreIndex.from_documents([document])

gen_mcqs.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import ast
+from pymongo import MongoClient
+from datetime import datetime
+import openai
+import google.generativeai as genai
+from google.generativeai import GenerativeModel
+from dotenv import load_dotenv
+import os
+from file_upload_vectorize import resources_collection, vectors_collection, courses_collection2, faculty_collection
+# Load environment variables
+load_dotenv()
+MONGO_URI = os.getenv('MONGO_URI')
+OPENAI_KEY = os.getenv('OPENAI_KEY')
+GEMINI_KEY = os.getenv('GEMINI_KEY')
+# Configure APIs
+openai.api_key = OPENAI_KEY
+genai.configure(api_key=GEMINI_KEY)
+model = genai.GenerativeModel('gemini-pro')
+# Connect to MongoDB
+client = MongoClient(MONGO_URI)
+db = client['novascholar_db']
+quizzes_collection = db["quizzes"]
+def strip_code_markers(response_text):
+    """Strip off the markers ``` and python from a LLM model's response"""
+    if response_text.startswith("```python"):
+        response_text = response_text[len("```python"):].strip()
+    if response_text.startswith("```"):
+        response_text = response_text[len("```"):].strip()
+    if response_text.endswith("```"):
+        response_text = response_text[:-len("```")].strip()
+    return response_text
+# New function to generate MCQs using Gemini
+def generate_mcqs(context, num_questions, session_title, session_description):
+    """Generate MCQs either from context or session details"""
+    try:
+        # Initialize Gemini model
+        if context:
+            prompt = f"""
+            Based on the following content, generate {num_questions} multiple choice questions.
+            Format each question as a Python dictionary with the following structure:
+            {{
+                "question": "Question text here",
+                "options": ["A) option1", "B) option2", "C) option3", "D) option4"],
+                "correct_option": "A) option1" or "B) option2" or "C) option3" or "D) option4"
+            }}
+            Content:
+            {context}
+            Generate challenging but clear questions that test understanding of key concepts.
+            Return only the Python list of dictionaries.
+            """
+        else:
+            prompt = f"""
+            Generate {num_questions} multiple choice questions about the topic:
+            Title: {session_title}
+            Description: {session_description}
+            Format each question as a Python dictionary with the following structure:
+            {{
+                "question": "Question text here",
+                "options": ["A) option1", "B) option2", "C) option3", "D) option4"],
+                "correct_option": "A" or "B" or "C" or "D"
+            }}
+            Generate challenging but clear questions.
+            Return only the Python list of dictionaries without any additional formatting or markers
+            Do not write any other text, do not start the response with (```python), do not end the response with backticks(```)
+            A Sample response should look like this: Response Text: [
+                {
+                    "question": "Which of the following is NOT a valid data type in C++?",
+                    "options": ["int", "double", "boolean", "char"],
+                    "correct_option": "C"
+                }
+            ] (Notice that there are no backticks(```) around the response and no (```python))
+            .
+            """
+        response = model.generate_content(prompt)
+        response_text = response.text.strip()
+        print("Response Text:", response_text)
+        modified_response_text = strip_code_markers(response_text)
+        print("Response Text Modified to:", modified_response_text)
+        # Extract and parse the response to get the list of MCQs
+        mcqs = ast.literal_eval(modified_response_text)  # Be careful with eval, consider using ast.literal_eval for production
+        print(mcqs)
+        if not mcqs:
+            raise ValueError("No questions generated")
+        return mcqs
+    except Exception as e:
+        print(f"Error generating MCQs: , error: {e}")
+        return None
+# New function to save quiz to database
+def save_quiz(course_id, session_id, title, questions, user_id):
+    """Save quiz to database"""
+    try:
+        quiz_data = {
+            "user_id": user_id,
+            "course_id": course_id,
+            "session_id": session_id,
+            "title": title,
+            "questions": questions,
+            "created_at": datetime.utcnow(),
+            "status": "active",
+            "submissions": []
+        }
+        result = quizzes_collection.insert_one(quiz_data)
+        return result.inserted_id
+    except Exception as e:
+        print(f"Error saving quiz: {e}")
+        return None
+def get_student_quiz_score(quiz_id, student_id):
+    """Get student's score for a specific quiz"""
+    quiz = quizzes_collection.find_one(
+        {
+            "_id": quiz_id,
+            "submissions.student_id": student_id
+        },
+        {"submissions.$": 1}
+    )
+    if quiz and quiz.get('submissions'):
+        return quiz['submissions'][0].get('score')
+    return None
+# def submit_quiz_answers(quiz_id, student_id, student_answers):
+#     """Submit and score student's quiz answers"""
+#     quiz = quizzes_collection.find_one({"_id": quiz_id})
+#     if not quiz:
+#         return None
+#     # Calculate score
+#     correct_answers = 0
+#     total_questions = len(quiz['questions'])
+#     for q_idx, question in enumerate(quiz['questions']):
+#         if student_answers.get(str(q_idx)) == question['correct_option']:
+#             correct_answers += 1
+#     score = (correct_answers / total_questions) * 100
+#     # Store submission
+#     submission_data = {
+#         "student_id": student_id,
+#         "answers": student_answers,
+#         "score": score,
+#         "submitted_at": datetime.utcnow()
+#     }
+#     # Update quiz with submission
+#     quizzes_collection.update_one(
+#         {"_id": quiz_id},
+#         {
+#             "$push": {"submissions": submission_data}
+#         }
+#     )
+#     return score
+def submit_quiz_answers(quiz_id, student_id, student_answers):
+    """Submit and score student's quiz answers"""
+    try:
+        quiz = quizzes_collection.find_one({"_id": quiz_id})
+        if not quiz:
+            return None
+        # Calculate score
+        correct_answers = 0
+        total_questions = len(quiz['questions'])
+        for q_idx, question in enumerate(quiz['questions']):
+            student_answer = student_answers.get(str(q_idx))
+            if student_answer:  # Only check if answer was provided
+                # Extract the option letter (A, B, C, D) from the full answer string
+                answer_letter = student_answer.split(')')[0].strip()
+                if answer_letter == question['correct_option']:
+                    correct_answers += 1
+        score = (correct_answers / total_questions) * 100
+        # Store submission
+        submission_data = {
+            "student_id": student_id,
+            "answers": student_answers,
+            "score": score,
+            "submitted_at": datetime.utcnow()
+        }
+        # Update quiz with submission
+        result = quizzes_collection.update_one(
+            {"_id": quiz_id},
+            {"$push": {"submissions": submission_data}}
+        )
+        return score if result.modified_count > 0 else None
+    except Exception as e:
+        print(f"Error submitting quiz: {e}")
+        return None

goals2.py ADDED Viewed

	@@ -0,0 +1,658 @@

+import streamlit as st
+from typing import List, Dict
+import httpx
+from pathlib import Path
+import os
+from dotenv import load_dotenv
+import json
+import numpy as np
+from pymongo import MongoClient
+from openai import OpenAI
+from datetime import datetime
+import asyncio
+import pandas as pd
+# Load environment variables
+load_dotenv()
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
+MONGODB_URI = os.getenv("MONGO_URI")
+OPENAI_API_KEY = os.getenv("OPENAI_KEY")
+# Initialize MongoDB client
+client = MongoClient(MONGODB_URI)
+db = client["document_analysis"]
+vectors_collection = db["document_vectors"]
+# Initialize OpenAI client
+openai_client = OpenAI(api_key=OPENAI_API_KEY)
+class GoalAnalyzer:
+    def __init__(self):
+        self.api_key = PERPLEXITY_API_KEY
+        self.base_url = "https://api.perplexity.ai/chat/completions"
+    def clean_json_string(self, content: str) -> str:
+        """Clean and extract valid JSON from string"""
+        # Remove markdown formatting
+        if "```json" in content:
+            content = content.split("```json")[1].split("```")[0]
+        elif "```" in content:
+            content = content.split("```")[1]
+        # Find the JSON object boundaries
+        start_idx = content.find("{")
+        end_idx = content.rfind("}") + 1
+        if start_idx != -1 and end_idx > 0:
+            content = content[start_idx:end_idx]
+        # Clean up common issues
+        content = content.strip()
+        content = content.replace("\n", "")
+        content = content.replace("'", '"')
+        return content
+    async def get_perplexity_analysis(self, text: str, goal: str) -> Dict:
+        """Get analysis from Perplexity API"""
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+        prompt = f"""
+        Analyze the following text in context of the goal: {goal}
+        Text: {text}
+        Provide analysis in the following JSON format:
+        {{
+            "themes": ["theme1", "theme2"],
+            "subthemes": {{"theme1": ["subtheme1", "subtheme2"], "theme2": ["subtheme3"]}},
+            "keywords": ["keyword1", "keyword2"],
+            "relevance_score": 0-100
+        }}
+        """
+        try:
+            async with httpx.AsyncClient() as client:
+                payload = {
+                    "model": "llama-3.1-sonar-small-128k-chat",  # Updated to supported model
+                    "messages": [
+                        {
+                            "role": "system",
+                            "content": "You are an AI assistant that analyzes documents and provides structured analysis.",
+                        },
+                        {"role": "user", "content": prompt},
+                    ],
+                    "max_tokens": 1024,
+                }
+                # Debug info using expander
+                with st.expander("Debug Info", expanded=False):
+                    st.write("Request payload:", payload)
+                response = await client.post(
+                    self.base_url, headers=headers, json=payload, timeout=30.0
+                )
+                # Debug response info
+                with st.expander("Response Info", expanded=False):
+                    st.write("Response status:", response.status_code)
+                    st.write("Response headers:", dict(response.headers))
+                    st.write("Response content:", response.text)
+                if response.status_code != 200:
+                    error_detail = (
+                        response.json() if response.content else "No error details"
+                    )
+                    raise Exception(
+                        f"API returned status code {response.status_code}. Details: {error_detail}"
+                    )
+                result = response.json()
+                content = (
+                    result.get("choices", [{}])[0].get("message", {}).get("content", "")
+                )
+                # Clean and parse JSON
+                cleaned_content = self.clean_json_string(content)
+                try:
+                    analysis = json.loads(cleaned_content)
+                    # Validate required fields
+                    required_fields = [
+                        "themes",
+                        "subthemes",
+                        "keywords",
+                        "relevance_score",
+                    ]
+                    for field in required_fields:
+                        if field not in analysis:
+                            analysis[field] = [] if field != "relevance_score" else 0
+                    return analysis
+                except json.JSONDecodeError as e:
+                    st.error(f"JSON parsing error: {str(e)}")
+                    st.error(f"Failed content: {cleaned_content}")
+                    return {
+                        "themes": ["Error parsing themes"],
+                        "subthemes": {"Error": ["Failed to parse subthemes"]},
+                        "keywords": ["parsing-error"],
+                        "relevance_score": 0,
+                    }
+        except Exception as e:
+            st.error(f"API Error: {str(e)}")
+            return None
+    def extract_text_from_file(self, file) -> str:
+        """Extract text content from uploaded file"""
+        try:
+            text = ""
+            file_type = file.type
+            if file_type == "text/plain":
+                text = file.getvalue().decode("utf-8")
+            elif file_type == "application/pdf":
+                import PyPDF2
+                pdf_reader = PyPDF2.PdfReader(file)
+                for page in pdf_reader.pages:
+                    text += page.extract_text()
+            elif (
+                file_type
+                == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+            ):
+                import docx
+                doc = docx.Document(file)
+                text = " ".join([paragraph.text for paragraph in doc.paragraphs])
+            return text
+        except Exception as e:
+            st.error(f"Error extracting text: {str(e)}")
+            return ""
+class DocumentVectorizer:
+    def __init__(self):
+        self.model = "text-embedding-ada-002"
+        self.client = MongoClient(MONGODB_URI)
+        self.db = self.client["document_analysis"]
+        self.vectors_collection = self.db["document_vectors"]
+        # Create vector search index if it doesn't exist
+        try:
+            self.vectors_collection.create_index(
+                [("vector", "2dsphere")],  # Changed to 2dsphere for vector indexing
+                {
+                    "vectorSearchConfig": {
+                        "dimensions": 1536,  # OpenAI embedding dimensions
+                        "similarity": "cosine",
+                    }
+                },
+            )
+        except Exception as e:
+            st.warning(f"Vector index may already exist")
+    def get_embedding(self, text: str) -> list:
+        """Get embedding vector for text using OpenAI"""
+        try:
+            response = openai_client.embeddings.create(model=self.model, input=text)
+            return response.data[0].embedding
+        except Exception as e:
+            st.error(f"Error getting embedding: {str(e)}")
+            return None
+    # Add this method to DocumentVectorizer class
+    def vector_exists(self, doc_name: str) -> bool:
+        """Check if vector exists for document"""
+        return self.vectors_collection.count_documents({"name": doc_name}) > 0
+    # Update store_vector method in DocumentVectorizer class
+    def store_vector(self, doc_name: str, vector: list, text: str, goal: str = None):
+        """Store document/goal vector in MongoDB using upsert"""
+        try:
+            vector_doc = {
+                "name": doc_name,
+                "vector": vector,
+                "text": text,
+                "type": "document" if goal is None else "goal",
+                "goal": goal,
+                "updated_at": datetime.utcnow(),
+            }
+            # Use update_one with upsert
+            self.vectors_collection.update_one(
+                {"name": doc_name},
+                {"$set": vector_doc, "$setOnInsert": {"created_at": datetime.utcnow()}},
+                upsert=True,
+            )
+        except Exception as e:
+            st.error(f"Error storing vector: {str(e)}")
+    # Update vector_search method in DocumentVectorizer class
+    def vector_search(self, query_vector: List[float], limit: int = 5) -> List[Dict]:
+        """Search for similar documents using vector similarity"""
+        try:
+            # Get all documents
+            documents = list(self.vectors_collection.find({"type": "document"}))
+            # Calculate similarities
+            similarities = []
+            for doc in documents:
+                similarity = self.calculate_similarity(query_vector, doc["vector"])
+                similarities.append(
+                    {
+                        "name": doc["name"],
+                        "text": doc["text"],
+                        "similarity": similarity,  # Keep as float
+                        "similarity_display": f"{similarity*100:.1f}%",  # Add display version
+                    }
+                )
+            # Sort by similarity and get top k
+            sorted_docs = sorted(
+                similarities,
+                key=lambda x: x["similarity"],  # Sort by float value
+                reverse=True,
+            )[:limit]
+            return sorted_docs
+        except Exception as e:
+            st.error(f"Vector search error: {str(e)}")
+            return []
+    def find_similar_documents(self, text: str, limit: int = 5) -> List[Dict]:
+        """Find similar documents for given text"""
+        vector = self.get_embedding(text)
+        if vector:
+            return self.vector_search(vector, limit)
+        return []
+    def calculate_similarity(self, vector1: list, vector2: list) -> float:
+        """Calculate cosine similarity between two vectors"""
+        return np.dot(vector1, vector2) / (
+            np.linalg.norm(vector1) * np.linalg.norm(vector2)
+        )
+def display_analysis_results(analysis: Dict):
+    """Display analysis results in Streamlit UI"""
+    if not analysis:
+        return
+    # Display Themes
+    st.subheader("Themes")
+    for theme in analysis.get("themes", []):
+        with st.expander(f"🎯 {theme}"):
+            # Display subthemes for this theme
+            subthemes = analysis.get("subthemes", {}).get(theme, [])
+            if subthemes:
+                st.write("**Subthemes:**")
+                for subtheme in subthemes:
+                    st.write(f"- {subtheme}")
+    # Display Keywords
+    st.subheader("Keywords")
+    keywords = analysis.get("keywords", [])
+    st.write(" | ".join([f"🔑 {keyword}" for keyword in keywords]))
+    # Display Relevance Score
+    score = analysis.get("relevance_score", 0)
+    st.metric("Relevance Score", f"{score}%")
+def display_analyst_dashboard():
+    st.title("Multi-Goal Document Analysis")
+    with st.sidebar:
+        st.markdown("### Input Section")
+        tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
+        # tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
+        with tab1:
+            # Multiple goals input
+            num_goals = st.number_input("Number of goals:", min_value=1, value=1)
+            goals = []
+            for i in range(num_goals):
+                goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
+                if goal:
+                    goals.append(goal)
+            uploaded_files = st.file_uploader(
+                "Upload documents",
+                accept_multiple_files=True,
+                type=["txt", "pdf", "docx"],
+            )
+            analyze_button = (
+                st.button("Analyze Documents") if goals and uploaded_files else None
+            )
+        with tab2:
+            # Keep existing similarity search tab
+            search_text = st.text_area("Enter text to find similar documents:")
+            search_limit = st.slider("Number of results", 1, 10, 5)
+            search_button = st.button("Search Similar") if search_text else None
+        if st.button("Logout", use_container_width=True):
+            for key in st.session_state.keys():
+                del st.session_state[key]
+            st.rerun()
+    if analyze_button:
+        analyzer = GoalAnalyzer()
+        vectorizer = DocumentVectorizer()
+        # Store vectors
+        doc_vectors = {}
+        goal_vectors = {}
+        # Process goals first
+        with st.spinner("Processing goals..."):
+            for i, goal in enumerate(goals):
+                vector = vectorizer.get_embedding(goal)
+                if vector:
+                    goal_vectors[f"Goal {i+1}"] = vector
+                    vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
+        # Process documents
+        with st.spinner("Processing documents..."):
+            for file in uploaded_files:
+                st.markdown(f"### Analysis for {file.name}")
+                if vectorizer.vector_exists(file.name):
+                    st.info(f"Vector already exists for {file.name}")
+                    existing_doc = vectorizer.vectors_collection.find_one(
+                        {"name": file.name}
+                    )
+                    doc_vectors[file.name] = existing_doc["vector"]
+                else:
+                    text = analyzer.extract_text_from_file(file)
+                    if not text:
+                        st.warning(f"Could not extract text from {file.name}")
+                        continue
+                    vector = vectorizer.get_embedding(text)
+                    if vector:
+                        doc_vectors[file.name] = vector
+                        vectorizer.store_vector(file.name, vector, text)
+                # Display goal similarities
+                st.subheader("Goal Relevance Scores")
+                col1, col2 = st.columns([1, 2])
+                with col1:
+                    for goal_name, goal_vector in goal_vectors.items():
+                        similarity = (
+                            vectorizer.calculate_similarity(
+                                doc_vectors[file.name], goal_vector
+                            )
+                            * 100
+                        )
+                        st.metric(f"{goal_name}", f"{similarity:.1f}%")
+                with col2:
+                    # Get analysis for all goals combined
+                    analysis = asyncio.run(
+                        analyzer.get_perplexity_analysis(text, " | ".join(goals))
+                    )
+                    display_analysis_results(analysis)
+                st.divider()
+            # Document similarity matrix
+            if len(doc_vectors) > 1:
+                st.markdown("### Document Similarity Matrix")
+                files = list(doc_vectors.keys())
+                similarity_matrix = []
+                for file1 in files:
+                    row = []
+                    for file2 in files:
+                        similarity = vectorizer.calculate_similarity(
+                            doc_vectors[file1], doc_vectors[file2]
+                        )
+                        row.append(similarity)
+                    similarity_matrix.append(row)
+                df = pd.DataFrame(similarity_matrix, columns=files, index=files)
+                st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
+                # Add goal-document similarity matrix
+                st.markdown("### Goal-Document Similarity Matrix")
+                goal_doc_matrix = []
+                goal_names = list(goal_vectors.keys())
+                for file in files:
+                    row = []
+                    for goal in goal_names:
+                        similarity = vectorizer.calculate_similarity(
+                            doc_vectors[file], goal_vectors[goal]
+                        )
+                        row.append(similarity)
+                    goal_doc_matrix.append(row)
+                df_goals = pd.DataFrame(
+                    goal_doc_matrix, columns=goal_names, index=files
+                )
+                st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
+    # Keep existing similarity search functionality
+    elif search_button:
+        vectorizer = DocumentVectorizer()
+        with st.spinner("Searching similar documents..."):
+            query_vector = vectorizer.get_embedding(search_text)
+            if query_vector:
+                similar_docs = vectorizer.vector_search(query_vector, search_limit)
+                if similar_docs:
+                    st.markdown("### Similar Documents Found")
+                    # Create DataFrame with numeric similarities
+                    df = pd.DataFrame(similar_docs)
+                    # Apply gradient to numeric column
+                    styled_df = df[["name", "similarity"]].style.background_gradient(
+                        cmap="RdYlGn", subset=["similarity"]
+                    )
+                    # Format display after styling
+                    styled_df = styled_df.format({"similarity": "{:.1%}"})
+                    st.dataframe(styled_df)
+                    # Show document contents
+                    for doc in similar_docs:
+                        with st.expander(
+                            f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
+                        ):
+                            st.text(
+                                doc["text"][:20] + "..."
+                                if len(doc["text"]) > 20
+                                else doc["text"]
+                            )
+                else:
+                    st.info("No similar documents found")
+            else:
+                st.error("Could not process search query")
+def main():
+    st.title("Multi-Goal Document Analysis")
+    with st.sidebar:
+        st.markdown("### Input Section")
+        tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
+        # tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
+        with tab1:
+            # Multiple goals input
+            num_goals = st.number_input("Number of goals:", min_value=1, value=1)
+            goals = []
+            for i in range(num_goals):
+                goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
+                if goal:
+                    goals.append(goal)
+            uploaded_files = st.file_uploader(
+                "Upload documents",
+                accept_multiple_files=True,
+                type=["txt", "pdf", "docx"],
+            )
+            analyze_button = (
+                st.button("Analyze Documents") if goals and uploaded_files else None
+            )
+        with tab2:
+            # Keep existing similarity search tab
+            search_text = st.text_area("Enter text to find similar documents:")
+            search_limit = st.slider("Number of results", 1, 10, 5)
+            search_button = st.button("Search Similar") if search_text else None
+    if analyze_button:
+        analyzer = GoalAnalyzer()
+        vectorizer = DocumentVectorizer()
+        # Store vectors
+        doc_vectors = {}
+        goal_vectors = {}
+        # Process goals first
+        with st.spinner("Processing goals..."):
+            for i, goal in enumerate(goals):
+                vector = vectorizer.get_embedding(goal)
+                if vector:
+                    goal_vectors[f"Goal {i+1}"] = vector
+                    vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
+        # Process documents
+        with st.spinner("Processing documents..."):
+            for file in uploaded_files:
+                st.markdown(f"### Analysis for {file.name}")
+                if vectorizer.vector_exists(file.name):
+                    st.info(f"Vector already exists for {file.name}")
+                    existing_doc = vectorizer.vectors_collection.find_one(
+                        {"name": file.name}
+                    )
+                    doc_vectors[file.name] = existing_doc["vector"]
+                else:
+                    text = analyzer.extract_text_from_file(file)
+                    if not text:
+                        st.warning(f"Could not extract text from {file.name}")
+                        continue
+                    vector = vectorizer.get_embedding(text)
+                    if vector:
+                        doc_vectors[file.name] = vector
+                        vectorizer.store_vector(file.name, vector, text)
+                # Display goal similarities
+                st.subheader("Goal Relevance Scores")
+                col1, col2 = st.columns([1, 2])
+                with col1:
+                    for goal_name, goal_vector in goal_vectors.items():
+                        similarity = (
+                            vectorizer.calculate_similarity(
+                                doc_vectors[file.name], goal_vector
+                            )
+                            * 100
+                        )
+                        st.metric(f"{goal_name}", f"{similarity:.1f}%")
+                with col2:
+                    # Get analysis for all goals combined
+                    analysis = asyncio.run(
+                        analyzer.get_perplexity_analysis(text, " | ".join(goals))
+                    )
+                    display_analysis_results(analysis)
+                st.divider()
+            # Document similarity matrix
+            if len(doc_vectors) > 1:
+                st.markdown("### Document Similarity Matrix")
+                files = list(doc_vectors.keys())
+                similarity_matrix = []
+                for file1 in files:
+                    row = []
+                    for file2 in files:
+                        similarity = vectorizer.calculate_similarity(
+                            doc_vectors[file1], doc_vectors[file2]
+                        )
+                        row.append(similarity)
+                    similarity_matrix.append(row)
+                df = pd.DataFrame(similarity_matrix, columns=files, index=files)
+                st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
+                # Add goal-document similarity matrix
+                st.markdown("### Goal-Document Similarity Matrix")
+                goal_doc_matrix = []
+                goal_names = list(goal_vectors.keys())
+                for file in files:
+                    row = []
+                    for goal in goal_names:
+                        similarity = vectorizer.calculate_similarity(
+                            doc_vectors[file], goal_vectors[goal]
+                        )
+                        row.append(similarity)
+                    goal_doc_matrix.append(row)
+                df_goals = pd.DataFrame(
+                    goal_doc_matrix, columns=goal_names, index=files
+                )
+                st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
+    # Keep existing similarity search functionality
+    elif search_button:
+        vectorizer = DocumentVectorizer()
+        with st.spinner("Searching similar documents..."):
+            query_vector = vectorizer.get_embedding(search_text)
+            if query_vector:
+                similar_docs = vectorizer.vector_search(query_vector, search_limit)
+                if similar_docs:
+                    st.markdown("### Similar Documents Found")
+                    # Create DataFrame with numeric similarities
+                    df = pd.DataFrame(similar_docs)
+                    # Apply gradient to numeric column
+                    styled_df = df[["name", "similarity"]].style.background_gradient(
+                        cmap="RdYlGn", subset=["similarity"]
+                    )
+                    # Format display after styling
+                    styled_df = styled_df.format({"similarity": "{:.1%}"})
+                    st.dataframe(styled_df)
+                    # Show document contents
+                    for doc in similar_docs:
+                        with st.expander(
+                            f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
+                        ):
+                            st.text(
+                                doc["text"][:20] + "..."
+                                if len(doc["text"]) > 20
+                                else doc["text"]
+                            )
+                else:
+                    st.info("No similar documents found")
+            else:
+                st.error("Could not process search query")
+if __name__ == "__main__":
+    main()

infranew.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import streamlit as st
+import pandas as pd
+import networkx as nx
+from bokeh.models import HoverTool
+from bokeh.plotting import figure, from_networkx
+import requests
+import json
+import google.generativeai as genai
+PERPLEXITY_API_KEY = "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"
+PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
+def extract_edges(keywords):
+    keywords = [kw.strip() for kw in keywords.split(",")]
+    edges = [
+        (keywords[i], keywords[j])
+        for i in range(len(keywords))
+        for j in range(i + 1, len(keywords))
+    ]
+    return edges
+def create_knowledge_graph(data):
+    G = nx.Graph()
+    for _, row in data.iterrows():
+        words = []
+        for col in data.columns:
+            if pd.notnull(row[col]):
+                # Convert to string and handle numeric values
+                cell_value = str(row[col]).strip()
+                if cell_value:
+                    words.extend(cell_value.split())
+        if words:
+            edges = extract_edges(",".join(words))
+            G.add_edges_from(edges)
+            for word in words:
+                word = word.strip()
+                if word not in G:
+                    G.add_node(word, title=word, value=len(word))
+    return G
+def render_graph_bokeh(G):
+    plot = figure(
+        title="Interactive Knowledge Graph",
+        x_range=(-1.5, 1.5),
+        y_range=(-1.5, 1.5),
+        tools="pan,wheel_zoom,box_zoom,reset,tap",
+        active_scroll="wheel_zoom",
+    )
+    plot.add_tools(HoverTool(tooltips="@index"))
+    graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0, 0))
+    graph_renderer.node_renderer.glyph.size = 10
+    graph_renderer.node_renderer.glyph.fill_color = "blue"
+    graph_renderer.node_renderer.glyph.line_color = "black"
+    graph_renderer.edge_renderer.glyph.line_width = 1
+    graph_renderer.edge_renderer.glyph.line_color = "gray"
+    plot.renderers.append(graph_renderer)
+    return plot
+import re
+def search_papers(topic: str, num_papers: int) -> list:
+    headers = {
+        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    prompt = f"""Find {num_papers} recent research papers about {topic}.
+    Return ONLY a valid JSON array with the following structure for each paper:
+    [
+        {{
+            "Title": "paper title",
+            "Abstract": "abstract text",
+            "Keywords": "key terms"
+        }}
+    ]"""
+    payload = {
+        "model": "llama-3.1-sonar-small-128k-chat",
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a research paper analyzer that returns valid JSON arrays.",
+            },
+            {"role": "user", "content": prompt},
+        ],
+        "temperature": 0.1,
+    }
+    try:
+        response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
+        response.raise_for_status()
+        content = response.json()["choices"][0]["message"]["content"]
+        # Clean response to ensure valid JSON
+        content = content.strip()
+        if not content.startswith("["):
+            content = content[content.find("[") :]
+        if not content.endswith("]"):
+            content = content[: content.rfind("]") + 1]
+        # Remove any trailing commas before closing brackets
+        content = re.sub(r",\s*]", "]", content)
+        content = re.sub(r",\s*}", "}", content)
+        papers = json.loads(content)
+        if not isinstance(papers, list):
+            raise ValueError("Response is not a JSON array")
+        return papers
+    except requests.exceptions.RequestException as e:
+        st.error(f"API Request Error: {str(e)}")
+        return []
+    except json.JSONDecodeError as e:
+        st.error(f"Invalid JSON response: {str(e)}")
+        st.error(f"Response content: {response.text}")
+        return []
+    except ValueError as e:
+        st.error(f"Error: {str(e)}")
+        return []
+import os
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
+def call_gemini_api(prompt: str) -> str:
+    headers = {
+        "Authorization": f"Bearer {GEMINI_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    payload = {
+        "prompt": prompt,
+        "max_tokens": 150,
+        "temperature": 0.7,
+    }
+    try:
+        model = genai.GenerativeModel("gemini-pro")
+        response = model.generate_content(prompt)
+        return response.text
+    except Exception as e:
+        st.error(f"Gemini API Error: {str(e)}")
+        return ""
+def generate_gaps_paragraph(gaps):
+    prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps: {', '.join(gaps)}"
+    return call_gemini_api(prompt)
+def generate_insights(G, topic):
+    papers = search_papers(topic, 5)
+    if papers:
+        st.write("### Research Insights from Perplexity API")
+        for paper in papers:
+            st.write(f"**Title:** {paper['Title']}")
+            st.write(f"**Abstract:** {paper['Abstract']}")
+            st.write(f"**Keywords:** {paper['Keywords']}")
+            st.write("---")
+    nodes = list(G.nodes(data=True))
+    insights = {}
+    insights["Strong Points"] = [
+        n for n, d in nodes if G.degree(n) > len(G.nodes) * 0.1
+    ]
+    insights["Weak Points"] = [n for n, d in nodes if G.degree(n) < len(G.nodes) * 0.05]
+    insights["Gaps"] = [n for n, d in nodes if len(list(nx.neighbors(G, n))) == 0]
+    st.write("### Graph-Based Insights")
+    st.write("**Strong Points:**", insights["Strong Points"])
+    st.write("**Weak Points:**", insights["Weak Points"])
+    st.write("**Gaps:**", insights["Gaps"])
+    if insights["Gaps"]:
+        with st.spinner("Generating insights about gaps..."):
+            gaps_paragraph = generate_gaps_paragraph(insights["Gaps"])
+            if gaps_paragraph:
+                st.write("### Gaps in Research")
+                st.write(gaps_paragraph)
+def main():
+    st.title("Advanced Interactive Knowledge Graph")
+    st.write(
+        "Upload a CSV file to generate a fully interactive and insightful knowledge graph."
+    )
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        try:
+            data = pd.read_csv(uploaded_file)
+            st.write("Preview of the uploaded data:")
+            st.dataframe(data.head())
+            G = create_knowledge_graph(data)
+            st.write("Generated Knowledge Graph:")
+            plot = render_graph_bokeh(G)
+            st.bokeh_chart(plot, use_container_width=True)
+            topic = st.text_input(
+                "Enter a topic for additional insights:", "knowledge graphs"
+            )
+            if topic:
+                generate_insights(G, topic)
+        except Exception as e:
+            st.error(f"An error occurred while processing the file: {e}")
+    else:
+        st.info("Please upload a CSV file to get started.")
+if __name__ == "__main__":
+    main()

keywords_database_download.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import streamlit as st
+import pandas as pd
+from pymongo import MongoClient
+from dotenv import load_dotenv
+import os
+import json
+import re
+# 1. Load environment variables
+load_dotenv()
+MONGODB_URI = os.getenv(
+    "MONGODB_UR",
+    "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
+)
+# 2. Create MongoDB connection
+client = MongoClient(MONGODB_URI)
+db = client["novascholar_db"]
+collection = db["research_papers"]
+def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Convert any columns that contain lists into comma-separated strings
+    in order to ensure consistent data types for CSV export.
+    """
+    for col in df.columns:
+        if any(isinstance(val, list) for val in df[col].dropna()):
+            df[col] = df[col].apply(
+                lambda x: (
+                    ", ".join(map(str, x))
+                    if isinstance(x, list)
+                    else (str(x) if pd.notna(x) else "")
+                )
+            )
+    return df
+def filter_and_export_collection_to_csv(keyword: str, doc_collection=None):
+    """
+    Find documents in the given collection with a matching keyword
+    in the 'Keywords' field, export them to CSV, and return the DataFrame
+    and CSV filename.
+    """
+    # Use the default 'research_papers' collection if none provided
+    if doc_collection is None:
+        doc_collection = collection
+    docs = list(doc_collection.find({"Keywords": {"$regex": keyword, "$options": "i"}}))
+    if docs:
+        df = pd.DataFrame(docs)
+        df = convert_mixed_columns(df)
+        csv_filename = "papers_filtered_export.csv"
+        df.to_csv(csv_filename, index=False)
+        return df, csv_filename
+    else:
+        # Return an empty DataFrame if no documents found
+        return pd.DataFrame(), None
+def main():
+    # st.set_page_config(page_title="Filter and Export Papers", layout="wide")
+    st.title("Filter and Export Papers by Keyword")
+    # Let user select the paper type
+    paper_type = st.selectbox(
+        "Select type of research paper:",
+        [
+            "Review Based Paper",
+            "Opinion/Perspective Based Paper",
+            "Empirical Research Paper",
+            "Research Paper (Other)",
+        ],
+    )
+    # 5. Let user enter the keyword to filter
+    keyword_input = st.text_input(
+        "Enter the exact keyword to filter papers by 'Keywords' field:"
+    )
+    # When user clicks button, use the collection for the selected paper type
+    if st.button("Export Filtered Papers to CSV"):
+        with st.spinner("Exporting filtered documents..."):
+            try:
+                # Determine dynamic collection based on paper type
+                collection_name = paper_type.replace(" ", "_").lower()
+                doc_collection = db[collection_name]
+                df, csv_filename = filter_and_export_collection_to_csv(
+                    keyword_input, doc_collection
+                )
+                if not df.empty and csv_filename:
+                    st.success(
+                        f"Successfully exported filtered papers to {csv_filename}!"
+                    )
+                    st.write("Preview of the filtered DataFrame:")
+                    st.dataframe(df)
+                else:
+                    st.warning("No matching documents found for that keyword.")
+            except Exception as e:
+                st.error(f"Error exporting filtered papers: {str(e)}")
+if __name__ == "__main__":
+    main()

live_polls.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# live_poll_feature.py
+import streamlit as st
+import pandas as pd
+from datetime import datetime
+from poll_db_operations import PollDatabase
+class LivePollFeature:
+    def __init__(self):
+        self.db = PollDatabase()
+    def display_faculty_interface(self, session_id):
+        """Display the faculty interface for managing polls"""
+        st.subheader("Live Polls Management")
+        # Create new poll
+        with st.expander("Create New Poll", expanded=False):
+            question = st.text_input("Poll Question")
+            num_options = st.number_input("Number of Options",
+                                        min_value=2,
+                                        max_value=6,
+                                        value=4)
+            options = []
+            for i in range(num_options):
+                option = st.text_input(f"Option {i+1}",
+                                     key=f"option_{i}")
+                if option:
+                    options.append(option)
+            if st.button("Create Poll") and question and len(options) >= 2:
+                self.db.create_poll(
+                    st.session_state.selected_course,
+                    session_id,
+                    question,
+                    options,
+                    st.session_state.user_id
+                )
+                st.success("Poll created successfully!")
+                st.rerun()
+        # Display active polls
+        active_polls = self.db.get_active_polls(session_id)
+        if active_polls:
+            st.subheader("Active Polls")
+            for poll in active_polls:
+                with st.expander(f"Poll: {poll['question']}", expanded=True):
+                    # Display results
+                    self._display_poll_results(poll)
+                    if st.button("Close Poll",
+                               key=f"close_{str(poll['_id'])}"):
+                        self.db.close_poll(poll['_id'])
+                        st.success("Poll closed successfully!")
+                        st.rerun()
+    def display_student_interface(self, session_id):
+        """Display the student interface for participating in polls"""
+        st.subheader("Live Polls")
+        active_polls = self.db.get_active_polls(session_id)
+        if not active_polls:
+            st.info("No active polls at the moment.")
+            return
+        for poll in active_polls:
+            with st.expander(f"Poll: {poll['question']}", expanded=True):
+                selected_option = st.radio(
+                    "Your response:",
+                    options=poll['options'],
+                    key=f"poll_{str(poll['_id'])}"
+                )
+                if st.button("Submit Response",
+                           key=f"submit_{str(poll['_id'])}"):
+                    success, message = self.db.submit_response(
+                        poll['_id'],
+                        st.session_state.user_id,
+                        selected_option
+                    )
+                    if success:
+                        st.success(message)
+                    else:
+                        st.warning(message)
+                    st.rerun()
+                # self._display_poll_results(poll)
+    def _display_poll_results(self, poll):
+        """Helper method to display poll results"""
+        responses_df = pd.DataFrame(
+            list(poll['responses'].items()),
+            columns=['Option', 'Votes']
+        )
+        total_votes = responses_df['Votes'].sum()
+        # Calculate percentages
+        if total_votes > 0:
+            responses_df['Percentage'] = (
+                responses_df['Votes'] / total_votes * 100
+            ).round(1)
+        else:
+            responses_df['Percentage'] = 0
+        # Display metrics
+        st.metric("Total Responses", total_votes)
+        # Display charts
+        st.bar_chart(responses_df.set_index('Option')['Votes'])
+        # Display detailed statistics
+        if st.session_state.user_type == 'faculty':
+            st.dataframe(responses_df)

loldude.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import plotly.express as px
+import plotly.graph_objects as go
+from collections import defaultdict
+def load_and_preprocess_data(uploaded_file):
+    """Load and preprocess the CSV data."""
+    df = pd.read_csv(uploaded_file)
+    # Combine relevant text fields for similarity comparison
+    df['combined_text'] = df['Title'] + ' ' + df['Abstract'] + ' ' + df['Keywords']
+    return df
+def calculate_similarity_matrix(df):
+    """Calculate cosine similarity matrix based on combined text."""
+    tfidf = TfidfVectorizer(stop_words='english')
+    tfidf_matrix = tfidf.fit_transform(df['combined_text'])
+    similarity_matrix = cosine_similarity(tfidf_matrix)
+    return similarity_matrix
+def find_similar_papers(similarity_matrix, df, threshold=0.7):
+    """Find pairs of papers with similarity above threshold."""
+    similar_pairs = []
+    for i in range(len(similarity_matrix)):
+        for j in range(i + 1, len(similarity_matrix)):
+            similarity = similarity_matrix[i][j]
+            if similarity >= threshold:
+                similar_pairs.append({
+                    'Paper 1': df.iloc[i]['Title'],
+                    'Paper 2': df.iloc[j]['Title'],
+                    'Similarity': similarity
+                })
+    return pd.DataFrame(similar_pairs)
+def find_outliers(similarity_matrix, df, threshold=0.3):
+    """Find papers with low average similarity to others."""
+    avg_similarities = np.mean(similarity_matrix, axis=1)
+    outliers = []
+    for i, avg_sim in enumerate(avg_similarities):
+        if avg_sim < threshold:
+            outliers.append({
+                'Title': df.iloc[i]['Title'],
+                'Average Similarity': avg_sim
+            })
+    return pd.DataFrame(outliers)
+def create_similarity_heatmap(similarity_matrix, df):
+    """Create a heatmap of similarity matrix."""
+    fig = go.Figure(data=go.Heatmap(
+        z=similarity_matrix,
+        x=df['Title'],
+        y=df['Title'],
+        colorscale='Viridis'
+    ))
+    fig.update_layout(
+        title='Paper Similarity Heatmap',
+        xaxis_tickangle=-45,
+        height=800
+    )
+    return fig
+def analyze_keywords(df):
+    """Analyze keyword frequency across papers."""
+    keyword_freq = defaultdict(int)
+    for keywords in df['Keywords']:
+        if isinstance(keywords, str):
+            for keyword in keywords.split(','):
+                keyword = keyword.strip()
+                keyword_freq[keyword] += 1
+    keyword_df = pd.DataFrame([
+        {'Keyword': k, 'Frequency': v}
+        for k, v in keyword_freq.items()
+    ]).sort_values('Frequency', ascending=False)
+    return keyword_df
+def main():
+    st.title('Research Papers Similarity Analysis')
+    uploaded_file = st.file_uploader("Upload your research papers CSV file", type=['csv'])
+    if uploaded_file is not None:
+        df = load_and_preprocess_data(uploaded_file)
+        similarity_matrix = calculate_similarity_matrix(df)
+        st.header('Document Similarity Analysis')
+        # Similarity Heatmap
+        st.subheader('Similarity Heatmap')
+        heatmap = create_similarity_heatmap(similarity_matrix, df)
+        st.plotly_chart(heatmap, use_container_width=True)
+        # Similar Papers
+        st.subheader('Similar Papers')
+        similarity_threshold = st.slider('Similarity Threshold', 0.0, 1.0, 0.7)
+        similar_papers = find_similar_papers(similarity_matrix, df, similarity_threshold)
+        if not similar_papers.empty:
+            st.dataframe(similar_papers)
+        else:
+            st.write("No papers found above the similarity threshold.")
+        # Outliers
+        st.subheader('Outlier Papers')
+        outlier_threshold = st.slider('Outlier Threshold', 0.0, 1.0, 0.3)
+        outliers = find_outliers(similarity_matrix, df, outlier_threshold)
+        if not outliers.empty:
+            st.dataframe(outliers)
+        else:
+            st.write("No outliers found below the threshold.")
+        # Keyword Analysis
+        st.header('Keyword Analysis')
+        keyword_freq = analyze_keywords(df)
+        if not keyword_freq.empty:
+            fig = px.bar(keyword_freq, x='Keyword', y='Frequency',
+                        title='Keyword Frequency Across Papers')
+            fig.update_xaxes(tickangle=45)
+            st.plotly_chart(fig, use_container_width=True)
+        # Basic Statistics
+        st.header('Basic Statistics')
+        col1, col2 = st.columns(2)
+        with col1:
+            st.metric("Total Papers", len(df))
+            st.metric("Average Similarity", f"{np.mean(similarity_matrix):.2f}")
+        with col2:
+            st.metric("Unique Keywords", len(keyword_freq))
+            st.metric("Max Similarity", f"{np.max(similarity_matrix[~np.eye(similarity_matrix.shape[0], dtype=bool)]):.2f}")
+if __name__ == "__main__":
+    main()

modify_schema.py ADDED Viewed

	@@ -0,0 +1,222 @@

+from db import courses_collection2
+from dotenv import load_dotenv
+import os
+from pymongo import MongoClient
+from datetime import datetime
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+client = MongoClient(MONGO_URI)
+db = client["novascholar_db"]
+# Define the updated course schema
+updated_course_schema = {
+    "bsonType": "object",
+    "required": [
+        "course_id",
+        "title",
+        "description",
+        "faculty",
+        "faculty_id",
+        "duration",
+        "created_at",
+    ],
+    "properties": {
+        "course_id": {
+            "bsonType": "string",
+            "description": "Unique identifier for the course",
+        },
+        "title": {"bsonType": "string", "description": "Title of the course"},
+        "description": {
+            "bsonType": "string",
+            "description": "Description of the course",
+        },
+        "faculty": {"bsonType": "string", "description": "Name of the faculty"},
+        "duration": {"bsonType": "string", "description": "Duration of the course"},
+        "created_at": {
+            "bsonType": "date",
+            "description": "Date when the course was created",
+        },
+        "sessions": {
+            "bsonType": "array",
+            "description": "List of sessions associated with the course",
+            "items": {
+                "bsonType": "object",
+                "required": ["session_id", "title", "date"],
+                "properties": {
+                    "session_id": {
+                        "bsonType": "string",
+                        "description": "Unique identifier for the session",
+                    },
+                    "title": {
+                        "bsonType": "string",
+                        "description": "Title of the session",
+                    },
+                    "date": {"bsonType": "date", "description": "Date of the session"},
+                    "status": {
+                        "bsonType": "string",
+                        "description": "Status of the session (e.g., completed, upcoming)",
+                    },
+                    "created_at": {
+                        "bsonType": "date",
+                        "description": "Date when the session was created",
+                    },
+                    "pre_class": {
+                        "bsonType": "object",
+                        "description": "Pre-class segment data",
+                        "properties": {
+                            "resources": {
+                                "bsonType": "array",
+                                "description": "List of pre-class resources",
+                                "items": {
+                                    "bsonType": "object",
+                                    "required": ["type", "title", "url"],
+                                    "properties": {
+                                        "type": {
+                                            "bsonType": "string",
+                                            "description": "Type of resource (e.g., pdf, video)",
+                                        },
+                                        "title": {
+                                            "bsonType": "string",
+                                            "description": "Title of the resource",
+                                        },
+                                        "url": {
+                                            "bsonType": "string",
+                                            "description": "URL of the resource",
+                                        },
+                                        "vector": {
+                                            "bsonType": "array",
+                                            "description": "Vector representation of the resource",
+                                            "items": {"bsonType": "double"},
+                                        },
+                                    },
+                                },
+                            },
+                            "completion_required": {
+                                "bsonType": "bool",
+                                "description": "Indicates if completion of pre-class resources is required",
+                            },
+                        },
+                    },
+                    "in_class": {
+                        "bsonType": "object",
+                        "description": "In-class segment data",
+                        "properties": {
+                            "topics": {
+                                "bsonType": "array",
+                                "description": "List of topics covered in the session",
+                                "items": {"bsonType": "string"},
+                            },
+                            "quiz": {
+                                "bsonType": "object",
+                                "description": "Quiz data",
+                                "properties": {
+                                    "title": {
+                                        "bsonType": "string",
+                                        "description": "Title of the quiz",
+                                    },
+                                    "questions": {
+                                        "bsonType": "int",
+                                        "description": "Number of questions in the quiz",
+                                    },
+                                    "duration": {
+                                        "bsonType": "int",
+                                        "description": "Duration of the quiz in minutes",
+                                    },
+                                },
+                            },
+                            "polls": {
+                                "bsonType": "array",
+                                "description": "List of polls conducted during the session",
+                                "items": {
+                                    "bsonType": "object",
+                                    "required": ["question", "options"],
+                                    "properties": {
+                                        "question": {
+                                            "bsonType": "string",
+                                            "description": "Poll question",
+                                        },
+                                        "options": {
+                                            "bsonType": "array",
+                                            "description": "List of poll options",
+                                            "items": {"bsonType": "string"},
+                                        },
+                                        "responses": {
+                                            "bsonType": "object",
+                                            "description": "Responses to the poll",
+                                            "additionalProperties": {"bsonType": "int"},
+                                        },
+                                    },
+                                },
+                            },
+                        },
+                    },
+                    "post_class": {
+                        "bsonType": "object",
+                        "description": "Post-class segment data",
+                        "properties": {
+                            "assignments": {
+                                "bsonType": "array",
+                                "description": "List of assignments",
+                                "items": {
+                                    "bsonType": "object",
+                                    "required": ["id", "title", "due_date", "status"],
+                                    "properties": {
+                                        "id": {
+                                            "bsonType": ["objectId", "int"],
+                                            "description": "Assignment ID",
+                                        },
+                                        "title": {
+                                            "bsonType": "string",
+                                            "description": "Title of the assignment",
+                                        },
+                                        "due_date": {
+                                            "bsonType": "date",
+                                            "description": "Due date of the assignment",
+                                        },
+                                        "status": {
+                                            "bsonType": "string",
+                                            "description": "Status of the assignment (e.g., pending, completed)",
+                                        },
+                                        "submissions": {
+                                            "bsonType": "array",
+                                            "description": "List of submissions",
+                                            "items": {
+                                                "bsonType": "object",
+                                                "properties": {
+                                                    "student_id": {
+                                                        "bsonType": "objectId",
+                                                        "description": "ID of the student who submitted the assignment",
+                                                    },
+                                                    "file_url": {
+                                                        "bsonType": "string",
+                                                        "description": "URL of the submitted file",
+                                                    },
+                                                    "submitted_at": {
+                                                        "bsonType": "date",
+                                                        "description": "Date when the assignment was submitted",
+                                                    },
+                                                },
+                                            },
+                                        },
+                                    },
+                                },
+                            }
+                        },
+                    },
+                },
+            },
+        },
+    },
+}
+# Update the schema using the collMod command
+db.command({
+    "collMod": "courses_collection2",
+    "validator": {"$jsonSchema": updated_course_schema}
+})
+print("Schema updated successfully!")

new_keywords.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import streamlit as st
+import pandas as pd
+from pymongo import MongoClient
+from dotenv import load_dotenv
+import os
+import json
+import re
+# 1. Load environment variables
+load_dotenv()
+MONGODB_URI = os.getenv(
+    "MONGODB_UR",
+    "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
+)
+# 2. Create MongoDB connection
+client = MongoClient(MONGODB_URI)
+db = client["novascholar_db"]
+collection = db["research_papers"]
+def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Convert any columns that contain lists into comma-separated strings
+    to ensure consistent data types for CSV export.
+    """
+    for col in df.columns:
+        if any(isinstance(val, list) for val in df[col].dropna()):
+            df[col] = df[col].apply(
+                lambda x: (
+                    ", ".join(map(str, x))
+                    if isinstance(x, list)
+                    else (str(x) if pd.notna(x) else "")
+                )
+            )
+    return df
+def filter_and_export_collection_to_csv(keywords_list, doc_collection):
+    """
+    Fetch documents from the specified collection where the 'Keywords' field
+    matches ANY of the keywords in 'keywords_list'. Convert to DataFrame,
+    ensure consistent column types, save to CSV, and return the DataFrame
+    and CSV filename.
+    """
+    # 3. Retrieve filtered documents from the collection based on 'Keywords' using $in with regex for substring matching
+    regex_keywords = [f".*{keyword}.*" for keyword in keywords_list]
+    docs = list(
+        doc_collection.find(
+            {"Keywords": {"$regex": "|".join(regex_keywords), "$options": "i"}}
+        )
+    )
+    # Convert documents to DataFrame
+    df = pd.DataFrame(docs)
+    if not df.empty:
+        # 4. Convert mixed columns
+        df = convert_mixed_columns(df)
+        # 5. Export to CSV
+        csv_filename = "filtered_papers_export.csv"
+        df.to_csv(csv_filename, index=False)
+        return df, csv_filename
+    else:
+        # Return an empty DataFrame and None if no documents found
+        return pd.DataFrame(), None
+def main():
+    st.title("Filter and Export Papers by Keyword")
+    # Let user select the paper type
+    paper_type = st.selectbox(
+        "Select type of research paper:",
+        [
+            "Review Based Paper",
+            "Opinion/Perspective Based Paper",
+            "Empirical Research Paper",
+            "Research Paper (Other)",
+        ],
+    )
+    # Let user enter the keyword to filter
+    keyword_input = st.text_input(
+        "Enter the exact keyword to filter papers by 'Keywords' field:"
+    )
+    # When user clicks button, use the collection for the selected paper type
+    if st.button("Export Filtered Papers to CSV"):
+        with st.spinner("Exporting filtered documents..."):
+            try:
+                # Determine dynamic collection based on paper type
+                collection_name = paper_type.replace(" ", "_").lower()
+                doc_collection = db[collection_name]
+                # Split keywords by commas and strip whitespace
+                keywords_list = [
+                    kw.strip() for kw in keyword_input.split(",") if kw.strip()
+                ]
+                if not keywords_list:
+                    st.warning("Please enter at least one keyword.")
+                else:
+                    df, csv_filename = filter_and_export_collection_to_csv(
+                        keywords_list, doc_collection
+                    )
+                    if not df.empty and csv_filename:
+                        st.success(
+                            f"Successfully exported filtered papers to {csv_filename}!"
+                        )
+                        st.download_button(
+                            label="Download CSV",
+                            data=df.to_csv(index=False).encode("utf-8"),
+                            file_name=csv_filename,
+                            mime="text/csv",
+                        )
+                        st.write("Preview of the filtered DataFrame:")
+                        st.dataframe(df)
+                    else:
+                        st.warning(
+                            "No matching documents found for the provided keyword(s)."
+                        )
+            except Exception as e:
+                st.error(f"Error exporting filtered papers: {str(e)}")
+if __name__ == "__main__":
+    main()

new_research_paper.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import streamlit as st
+import pandas as pd
+import requests
+import json
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
+PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
+def call_perplexity_api(prompt: str) -> str:
+    """Call Perplexity AI with a prompt, return the text response if successful."""
+    headers = {
+        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    payload = {
+        "model": "llama-3.1-sonar-small-128k-chat",
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.3,
+    }
+    try:
+        response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except Exception as e:
+        st.error(f"API Error: {str(e)}")
+        return ""
+def generate_research_paper(df: pd.DataFrame) -> dict:
+    """
+    For each column in the DataFrame, generate a research paper section (200-500 words)
+    that addresses the data in that column. Return a dict mapping column -> text.
+    """
+    paper_sections = {}
+    for col in df.columns:
+        # Convert all non-null rows in the column to strings and join them for context
+        col_values = df[col].dropna().astype(str).tolist()
+        # We'll truncate if this is huge
+        sample_text = " | ".join(col_values[:50])  # limit to first 50 rows for brevity
+        prompt = f"""
+        Topic: {col}
+        Data Sample: {sample_text}
+        Generate a professional research paper section for the above column.
+        The section should be at least 100 words and at most 150 words,
+        focusing on key insights, challenges, and potential research angles.
+        Integrate the data samples as context for the content.
+        """
+        section_text = call_perplexity_api(prompt)
+        paper_sections[col] = section_text.strip() if section_text else ""
+    return paper_sections
+def format_paper(paper_dict: dict) -> str:
+    """
+    Format the generated paper into a Markdown string.
+    Each column name is used as a heading, and the text is placed under it.
+    """
+    md_text = "# Generated Research Paper\n\n"
+    for col, content in paper_dict.items():
+        md_text += f"## {col}\n{content}\n\n"
+    return md_text
+def main():
+    st.title("Corpus-based Research Paper Generator")
+    uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
+    if uploaded_file:
+        df = pd.read_csv(uploaded_file)
+        st.write("### Preview of Uploaded Data")
+        st.dataframe(df.head())
+        if st.button("Generate Research Paper"):
+            st.info("Generating paper based on the columns of your corpus...")
+            with st.spinner("Calling Perplexity AI..."):
+                paper = generate_research_paper(df)
+                if paper:
+                    formatted_paper = format_paper(paper)
+                    st.success("Research Paper Generated Successfully!")
+                    st.write(formatted_paper)
+                    st.download_button(
+                        label="Download Paper as Markdown",
+                        data=formatted_paper,
+                        file_name="research_paper.md",
+                        mime="text/markdown",
+                    )
+                else:
+                    st.error(
+                        "Paper generation failed. Please check Perplexity API key."
+                    )
+if __name__ == "__main__":
+    main()

poll_db_operations.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from pymongo import MongoClient
+from datetime import datetime
+from bson import ObjectId
+from dotenv import load_dotenv
+import os
+load_dotenv()
+MONGO_URI = os.getenv('MONGO_URI')
+class PollDatabase:
+    def __init__(self):
+        self.client = MongoClient(MONGO_URI)
+        self.db = self.client["novascholar_db"]
+    def create_poll(self, course_id, session_id, question, options, faculty_id):
+        """Create a new poll"""
+        poll = {
+            "course_id": course_id,
+            "session_id": session_id,
+            "faculty_id": faculty_id,
+            "question": question,
+            "options": options,
+            "status": "active",
+            "created_at": datetime.now(),
+            "responses": {option: 0 for option in options}
+        }
+        return self.db.polls.insert_one(poll)
+    def get_active_polls(self, session_id):
+        """Get all active polls for a session"""
+        return list(self.db.polls.find({
+            "session_id": session_id,
+            "status": "active"
+        }))
+    def submit_response(self, poll_id, student_id, selected_option):
+        """Submit a student's response to a poll"""
+        try:
+            # Record individual response
+            response = {
+                "poll_id": poll_id,
+                "student_id": student_id,
+                "selected_option": selected_option,
+                "submitted_at": datetime.now()
+            }
+            self.db.poll_responses.insert_one(response)
+            # Update aggregated results
+            self.db.polls.update_one(
+                {"_id": ObjectId(poll_id)},
+                {"$inc": {f"responses.{selected_option}": 1}}
+            )
+            return True, "Vote recorded successfully"
+        except Exception as e:
+            if "duplicate key error" in str(e):
+                return False, "You have already voted in this poll"
+            return False, f"Error recording vote: {str(e)}"
+    def close_poll(self, poll_id):
+        """Close a poll"""
+        return self.db.polls.update_one(
+            {"_id": ObjectId(poll_id)},
+            {"$set": {"status": "closed"}}
+        )
+    def get_poll_analytics(self, poll_id):
+        """Get detailed analytics for a poll"""
+        poll = self.db.polls.find_one({"_id": ObjectId(poll_id)})
+        responses = self.db.poll_responses.find({"poll_id": ObjectId(poll_id)})
+        return poll, list(responses)

poll_db_setup.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from pymongo import MongoClient
+from datetime import datetime
+from dotenv import load_dotenv
+import os
+load_dotenv()
+MONGO_URI = os.getenv('MONGO_URI')
+def setup_mongodb():
+    """Initialize MongoDB connection and create collections with indexes"""
+    client = MongoClient(MONGO_URI)
+    db = client["novascholar_db"]
+    # Create indexes for polls collection
+    db.polls.create_index([("session_id", 1), ("status", 1)])
+    db.polls.create_index([("course_id", 1)])
+    # Create unique index for poll_responses to prevent duplicate votes
+    db.poll_responses.create_index(
+        [("poll_id", 1), ("student_id", 1)],
+        unique=True
+    )
+    return "Database setup completed successfully"
+def print_all_polls():
+    """Print all polls in the database"""
+    client = MongoClient(MONGO_URI)
+    db = client["novascholar_db"]
+    polls = db.polls.find()
+    for poll in polls:
+        print(poll)
+if __name__ == "__main__":
+    print(print_all_polls())

pre_class_analytics2.py ADDED Viewed

	@@ -0,0 +1,759 @@

+import json
+import typing_extensions as typing
+import google.generativeai as genai
+from typing import List, Dict, Any
+import numpy as np
+from collections import defaultdict
+from dotenv import load_dotenv
+import os
+import pymongo
+from pymongo import MongoClient
+load_dotenv()
+GEMINI_API_KEY = os.getenv('GEMINI_KEY')
+class EngagementMetrics(typing.TypedDict):
+    participation_level: str  # "high" | "medium" | "low"
+    question_quality: str     # "advanced" | "intermediate" | "basic"
+    concept_understanding: str  # "strong" | "moderate" | "needs_improvement"
+class StudentInsight(typing.TypedDict):
+    student_id: str
+    performance_level: str  # "high_performer" | "average" | "at_risk"
+    struggling_topics: list[str]
+    engagement_metrics: EngagementMetrics
+class TopicInsight(typing.TypedDict):
+    topic: str
+    difficulty_level: float  # 0 to 1
+    student_count: int
+    common_issues: list[str]
+    key_misconceptions: list[str]
+class RecommendedAction(typing.TypedDict):
+    action: str
+    priority: str  # "high" | "medium" | "low"
+    target_group: str  # "all_students" | "specific_students" | "faculty"
+    reasoning: str
+    expected_impact: str
+class ClassDistribution(typing.TypedDict):
+    high_performers: float
+    average_performers: float
+    at_risk: float
+class CourseHealth(typing.TypedDict):
+    overall_engagement: float  # 0 to 1
+    critical_topics: list[str]
+    class_distribution: ClassDistribution
+class InterventionMetrics(typing.TypedDict):
+    immediate_attention_needed: list[str]  # student_ids
+    monitoring_required: list[str]  # student_ids
+class AnalyticsResponse(typing.TypedDict):
+    topic_insights: list[TopicInsight]
+    student_insights: list[StudentInsight]
+    recommended_actions: list[RecommendedAction]
+    course_health: CourseHealth
+    intervention_metrics: InterventionMetrics
+class NovaScholarAnalytics:
+    def __init__(self, model_name: str = "gemini-1.5-flash"):
+        genai.configure(api_key=GEMINI_API_KEY)
+        self.model = genai.GenerativeModel(model_name)
+    def _create_analytics_prompt(self, chat_histories: List[Dict], all_topics: List[str]) -> str:
+        """Creates a structured prompt for Gemini to analyze chat histories."""
+        # Prompt 1:
+        # return f"""Analyze these student chat histories for a university course and provide detailed analytics.
+        # Context:
+        # - These are pre-class chat interactions between students and an AI tutor
+        # - Topics covered: {', '.join(all_topics)}
+        # Chat histories: {json.dumps(chat_histories, indent=2)}
+        # Return the analysis in JSON format matching this exact schema:
+        # {AnalyticsResponse.__annotations__}
+        # Ensure all numeric values are between 0 and 1 (accuracy upto 3 decimal places) where applicable.
+        # Important analysis guidelines:
+        # 1. Identify topics where students show confusion or ask multiple follow-up questions
+        # 2. Look for patterns in question types and complexity
+        # 3. Analyze response understanding based on follow-up questions
+        # 4. Consider both explicit and implicit signs of difficulty
+        # 5. Focus on concept relationships and prerequisite understanding"""
+        # Prompt 2:
+        # return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
+        # Context:
+        # - Chat histories: {json.dumps(chat_histories, indent=2)}
+        # - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
+        # - Topics covered: {', '.join(all_topics)}.
+        # Your task is to extract key insights that will help faculty address challenges effectively and enhance learning outcomes.
+        # Output Format:
+        # 1. Topics where students face significant difficulties:
+        # - Provide a ranked list of topics where the majority of students are struggling, based on the frequency and nature of their questions or misconceptions.
+        # - Include the percentage of students who found each topic challenging.
+        # 2. AI-recommended actions for faculty:
+        # - Suggest actionable steps to address the difficulties identified in each critical topic.
+        # - Specify the priority of each action (high, medium, low) based on the urgency and impact.
+        # - Explain the reasoning behind each recommendation and its expected impact on student outcomes.
+        # 3. Student-specific analytics (focusing on at-risk students):
+        # - Identify students categorized as "at-risk" based on their engagement levels, question complexity, and recurring struggles.
+        # - For each at-risk student, list their top 3 struggling topics and their engagement metrics (participation level, concept understanding).
+        # - Provide personalized recommendations for improving their understanding.
+        # Guidelines for Analysis:
+        # - Focus on actionable and concise insights rather than exhaustive details.
+        # - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
+        # - Prioritize topics with higher difficulty scores or more students struggling.
+        # - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
+        # The response must be well-structured, concise, and highly actionable for faculty to implement improvements effectively."""
+        # Prompt 3:
+        return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
+        Context:
+        - Chat histories: {json.dumps(chat_histories, indent=2)}
+        - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
+        - Topics covered: {', '.join(all_topics)}.
+        Your task is to provide detailed analytics that will help faculty address challenges effectively and enhance learning outcomes.
+        Output Format (strictly follow this JSON structure):
+        {{
+        "topic_wise_insights": [
+            {{
+            "topic": "<string>",
+            "struggling_percentage": <number between 0 and 1>,
+            "key_issues": ["<string>", "<string>", ...],
+            "key_misconceptions": ["<string>", "<string>", ...],
+            "recommended_actions": {{
+                "description": "<string>",
+                "priority": "high|medium|low",
+                "expected_outcome": "<string>"
+            }}
+            }}
+        ],
+        "ai_recommended_actions": [
+        {{
+            "action": "<string>",
+            "priority": "high|medium|low",
+            "reasoning": "<string>",
+            "expected_outcome": "<string>",
+            "pedagogy_recommendations": {{
+                "methods": ["<string>", "<string>", ...],
+                "resources": ["<string>", "<string>", ...],
+                "expected_impact": "<string>"
+            }}
+        }}
+        ],
+        "student_analytics": [
+            {{
+            "student_id": "<string>",
+            "engagement_metrics": {{
+                "participation_level": <number between 0 and 1>,
+                "concept_understanding": "strong|moderate|needs_improvement",
+                "question_quality": "advanced|intermediate|basic"
+            }},
+            "struggling_topics": ["<string>", "<string>", ...],
+            "personalized_recommendation": "<string>"
+            }}
+        ]
+        }}
+        Guidelines for Analysis:
+        - Focus on actionable and concise insights rather than exhaustive details.
+        - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
+        - Prioritize topics with higher difficulty scores or more students struggling.
+        - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
+        - Make sure to include All** students in the analysis, not just a subset.
+        - for the ai_recommended_actions:
+            - Prioritize pedagogy recommendations for critical topics with the high difficulty scores or struggling percentages.
+            - For each action:
+                - Include specific teaching methods (e.g., interactive discussions or quizzes, problem-based learning, practical examples etc).
+                - Recommend supporting resources (e.g., videos, handouts, simulations).
+                - Provide reasoning for the recommendation and the expected outcomes for student learning.
+                - Example:
+                - **Action:** Conduct an interactive problem-solving session on "<Topic Name>".
+                - **Reasoning:** Students showed difficulty in applying concepts to practical problems.
+                - **Expected Outcome:** Improved practical understanding and application of the topic.
+                - **Pedagogy Recommendations:**
+                    - **Methods:** Group discussions, real-world case studies.
+                    - **Resources:** Online interactive tools, relevant case studies, video walkthroughs.
+                    - **Expected Impact:** Enhance conceptual clarity by 40% and practical application by 30%.
+        The response must adhere strictly to the above JSON structure, with all fields populated appropriately."""
+    def _calculate_class_distribution(self, analytics: Dict) -> Dict:
+        """Calculate the distribution of students across performance levels."""
+        try:
+            total_students = len(analytics.get("student_insights", []))
+            if total_students == 0:
+                return {
+                    "high_performers": 0,
+                    "average_performers": 0,
+                    "at_risk": 0
+                }
+            distribution = defaultdict(int)
+            for student in analytics.get("student_insights", []):
+                performance_level = student.get("performance_level", "average")
+                # Map performance levels to our three categories
+                if performance_level in ["excellent", "high", "high_performer"]:
+                    distribution["high_performers"] += 1
+                elif performance_level in ["struggling", "low", "at_risk"]:
+                    distribution["at_risk"] += 1
+                else:
+                    distribution["average_performers"] += 1
+            # Convert to percentages
+            return {
+                level: count/total_students
+                for level, count in distribution.items()
+            }
+        except Exception as e:
+            print(f"Error calculating class distribution: {str(e)}")
+            return {
+                "high_performers": 0,
+                "average_performers": 0,
+                "at_risk": 0
+            }
+    def _identify_urgent_cases(self, analytics: Dict) -> List[str]:
+        """Identify students needing immediate attention."""
+        try:
+            urgent_cases = []
+            for student in analytics.get("student_insights", []):
+                student_id = student.get("student_id")
+                if not student_id:
+                    continue
+                # Check multiple risk factors
+                risk_factors = 0
+                # Factor 1: Performance level
+                if student.get("performance_level") in ["struggling", "at_risk", "low"]:
+                    risk_factors += 1
+                # Factor 2: Number of struggling topics
+                if len(student.get("struggling_topics", [])) >= 2:
+                    risk_factors += 1
+                # Factor 3: Engagement metrics
+                engagement = student.get("engagement_metrics", {})
+                if (engagement.get("participation_level") == "low" or
+                    engagement.get("concept_understanding") == "needs_improvement"):
+                    risk_factors += 1
+                # If student has multiple risk factors, add to urgent cases
+                if risk_factors >= 2:
+                    urgent_cases.append(student_id)
+            return urgent_cases
+        except Exception as e:
+            print(f"Error identifying urgent cases: {str(e)}")
+            return []
+    def _identify_monitoring_cases(self, analytics: Dict) -> List[str]:
+        """Identify students who need monitoring but aren't urgent cases."""
+        try:
+            monitoring_cases = []
+            urgent_cases = set(self._identify_urgent_cases(analytics))
+            for student in analytics.get("student_insights", []):
+                student_id = student.get("student_id")
+                if not student_id or student_id in urgent_cases:
+                    continue
+                # Check monitoring criteria
+                monitoring_needed = False
+                # Criterion 1: Has some struggling topics but not enough for urgent
+                if len(student.get("struggling_topics", [])) == 1:
+                    monitoring_needed = True
+                # Criterion 2: Medium-low engagement
+                engagement = student.get("engagement_metrics", {})
+                if engagement.get("participation_level") == "medium":
+                    monitoring_needed = True
+                # Criterion 3: Recent performance decline
+                if student.get("performance_level") == "average":
+                    monitoring_needed = True
+                if monitoring_needed:
+                    monitoring_cases.append(student_id)
+            return monitoring_cases
+        except Exception as e:
+            print(f"Error identifying monitoring cases: {str(e)}")
+            return []
+    def _identify_critical_topics(self, analytics: Dict) -> List[str]:
+        """
+        Identify critical topics that need attention based on multiple factors.
+        Returns a list of topic names that are considered critical.
+        """
+        try:
+            critical_topics = []
+            topics = analytics.get("topic_insights", [])
+            for topic in topics:
+                if not isinstance(topic, dict):
+                    continue
+                # Initialize score for topic criticality
+                critical_score = 0
+                # Factor 1: High difficulty level
+                difficulty_level = topic.get("difficulty_level", 0)
+                if difficulty_level > 0.7:
+                    critical_score += 2
+                elif difficulty_level > 0.5:
+                    critical_score += 1
+                # Factor 2: Number of students struggling
+                student_count = topic.get("student_count", 0)
+                total_students = len(analytics.get("student_insights", []))
+                if total_students > 0:
+                    struggle_ratio = student_count / total_students
+                    if struggle_ratio > 0.5:
+                        critical_score += 2
+                    elif struggle_ratio > 0.3:
+                        critical_score += 1
+                # Factor 3: Number of common issues
+                if len(topic.get("common_issues", [])) > 2:
+                    critical_score += 1
+                # Factor 4: Number of key misconceptions
+                if len(topic.get("key_misconceptions", [])) > 1:
+                    critical_score += 1
+                # If topic exceeds threshold, mark as critical
+                if critical_score >= 3:
+                    critical_topics.append(topic.get("topic", "Unknown Topic"))
+            return critical_topics
+        except Exception as e:
+            print(f"Error identifying critical topics: {str(e)}")
+            return []
+    def _calculate_engagement(self, analytics: Dict) -> Dict:
+        """
+        Calculate detailed engagement metrics across all students.
+        Returns a dictionary with engagement statistics.
+        """
+        try:
+            total_students = len(analytics.get("student_insights", []))
+            if total_students == 0:
+                return {
+                    "total_students": 0,
+                    "overall_score": 0,
+                    "engagement_distribution": {
+                        "high": 0,
+                        "medium": 0,
+                        "low": 0
+                    },
+                    "participation_metrics": {
+                        "average_topics_per_student": 0,
+                        "active_participants": 0
+                    }
+                }
+            engagement_levels = defaultdict(int)
+            total_topics_engaged = 0
+            active_participants = 0
+            for student in analytics.get("student_insights", []):
+                # Get engagement metrics
+                metrics = student.get("engagement_metrics", {})
+                # Calculate participation level
+                participation = metrics.get("participation_level", "low").lower()
+                engagement_levels[participation] += 1
+                # Count topics student is engaged with
+                topics_count = len(student.get("struggling_topics", []))
+                total_topics_engaged += topics_count
+                # Count active participants (students engaging with any topics)
+                if topics_count > 0:
+                    active_participants += 1
+            # Calculate overall engagement score (0-1)
+            weighted_score = (
+                (engagement_levels["high"] * 1.0 +
+                engagement_levels["medium"] * 0.6 +
+                engagement_levels["low"] * 0.2) / total_students
+            )
+            return {
+                "total_students": total_students,
+                "overall_score": round(weighted_score, 2),
+                "engagement_distribution": {
+                    level: count/total_students
+                    for level, count in engagement_levels.items()
+                },
+                "participation_metrics": {
+                    "average_topics_per_student": round(total_topics_engaged / total_students, 2),
+                    "active_participants_ratio": round(active_participants / total_students, 2)
+                }
+            }
+        except Exception as e:
+            print(f"Error calculating engagement: {str(e)}")
+            return {
+                "total_students": 0,
+                "overall_score": 0,
+                "engagement_distribution": {
+                    "high": 0,
+                    "medium": 0,
+                    "low": 0
+                },
+                "participation_metrics": {
+                    "average_topics_per_student": 0,
+                    "active_participants_ratio": 0
+                }
+            }
+    def _process_gemini_response(self, response: str) -> Dict:
+        """Process and validate Gemini's response."""
+        # try:
+        #     analytics = json.loads(response)
+        #     return self._enrich_analytics(analytics)
+        # except json.JSONDecodeError as e:
+        #     print(f"Error decoding Gemini response: {e}")
+        #     return self._fallback_analytics()
+        try:
+            # Parse JSON response
+            analytics = json.loads(response)
+            # Validate required fields exist
+            required_fields = {
+                "topic_insights": [],
+                "student_insights": [],
+                "recommended_actions": []
+            }
+            # Ensure all required fields exist with default values
+            for field, default_value in required_fields.items():
+                if field not in analytics or not analytics[field]:
+                    analytics[field] = default_value
+            # Now enrich the validated analytics
+            return self._enrich_analytics(analytics)
+        except (json.JSONDecodeError, KeyError, TypeError) as e:
+            print(f"Error processing Gemini response: {str(e)}")
+            print(f"Raw response: {response}")
+            return self._fallback_analytics()
+    def _enrich_analytics(self, analytics: Dict) -> Dict:
+        """Add derived insights and metrics to the analytics."""
+        # Add overall course health metrics
+        analytics["course_health"] = {
+            "overall_engagement": self._calculate_engagement(analytics),
+            "critical_topics": self._identify_critical_topics(analytics),
+            "class_distribution": self._calculate_class_distribution(analytics)
+        }
+        # Add intervention urgency scores
+        analytics["intervention_metrics"] = {
+            "immediate_attention_needed": self._identify_urgent_cases(analytics),
+            "monitoring_required": self._identify_monitoring_cases(analytics)
+        }
+        return analytics
+    def _calculate_engagement(self, analytics: Dict) -> Dict:
+        # """Calculate overall engagement metrics."""
+        # total_students = len(analytics["student_insights"])
+        # engagement_levels = defaultdict(int)
+        # for student in analytics["student_insights"]:
+        #     engagement_levels[student["engagement_metrics"]["participation_level"]] += 1
+        # return {
+        #     "total_students": total_students,
+        #     "engagement_distribution": {
+        #         level: count/total_students
+        #         for level, count in engagement_levels.items()
+        #     }
+        # }
+        """Calculate overall engagement metrics with defensive programming."""
+        try:
+            total_students = len(analytics.get("student_insights", []))
+            if total_students == 0:
+                return {
+                    "total_students": 0,
+                    "engagement_distribution": {
+                        "high": 0,
+                        "medium": 0,
+                        "low": 0
+                    }
+                }
+            engagement_levels = defaultdict(int)
+            for student in analytics.get("student_insights", []):
+                metrics = student.get("engagement_metrics", {})
+                level = metrics.get("participation_level", "low")
+                engagement_levels[level] += 1
+            return {
+                "total_students": total_students,
+                "engagement_distribution": {
+                    level: count/total_students
+                    for level, count in engagement_levels.items()
+                }
+            }
+        except Exception as e:
+            print(f"Error calculating engagement: {str(e)}")
+            return {
+                "total_students": 0,
+                "engagement_distribution": {
+                    "high": 0,
+                    "medium": 0,
+                    "low": 0
+                }
+            }
+    def _identify_critical_topics(self, analytics: Dict) -> List[Dict]:
+        # """Identify topics needing immediate attention."""
+        # return [
+        #     topic for topic in analytics["topic_insights"]
+        #     if topic["difficulty_level"] > 0.7 or
+        #     len(topic["common_issues"]) > 2
+        # ]
+        """Identify topics needing immediate attention with defensive programming."""
+        try:
+            return [
+                topic for topic in analytics.get("topic_insights", [])
+                if topic.get("difficulty_level", 0) > 0.7 or
+                len(topic.get("common_issues", [])) > 2
+            ]
+        except Exception as e:
+            print(f"Error identifying critical topics: {str(e)}")
+            return []
+    def generate_analytics(self, chat_histories: List[Dict], all_topics: List[str]) -> Dict:
+        # Method 1: (caused key 'student_insights' error):
+        # """Main method to generate analytics from chat histories."""
+        # # Preprocess chat histories
+        # processed_histories = self._preprocess_chat_histories(chat_histories)
+        # # Create and send prompt to Gemini
+        # prompt = self._create_analytics_prompt(processed_histories, all_topics)
+        # response = self.model.generate_content(
+        #     prompt,
+        #     generation_config=genai.GenerationConfig(
+        #         response_mime_type="application/json",
+        #         response_schema=AnalyticsResponse
+        #     )
+        # )
+        # # # Process and enrich analytics
+        # # analytics = self._process_gemini_response(response.text)
+        # # return analytics
+        # # Process, validate, and enrich the response
+        # analytics = self._process_gemini_response(response.text)
+        # # Then cast it to satisfy the type checker
+        # return typing.cast(AnalyticsResponse, analytics)
+        # Method 2 (possible fix):
+        # """Main method to generate analytics with better error handling."""
+        # try:
+        #     processed_histories = self._preprocess_chat_histories(chat_histories)
+        #     prompt = self._create_analytics_prompt(processed_histories, all_topics)
+        #     response = self.model.generate_content(
+        #         prompt,
+        #         generation_config=genai.GenerationConfig(
+        #             response_mime_type="application/json",
+        #             temperature=0.15
+        #             # response_schema=AnalyticsResponse
+        #         )
+        #     )
+        #     if not response.text:
+        #         print("Empty response from Gemini")
+        #         return self._fallback_analytics()
+        #     # analytics = self._process_gemini_response(response.text)
+        #     # return typing.cast(AnalyticsResponse, analytics)
+        #     # return response.text;
+        #     analytics = json.loads(response.text)
+        #     return analytics
+        # except Exception as e:
+        #     print(f"Error generating analytics: {str(e)}")
+        #     return self._fallback_analytics()
+        # Debugging code:
+        """Main method to generate analytics with better error handling."""
+        try:
+            # Debug print for input validation
+            print("Input validation:")
+            print(f"Chat histories: {len(chat_histories)} entries")
+            print(f"Topics: {all_topics}")
+            if not chat_histories or not all_topics:
+                print("Missing required input data")
+                return self._fallback_analytics()
+            # Debug the preprocessing step
+            try:
+                processed_histories = self._preprocess_chat_histories(chat_histories)
+                print("Successfully preprocessed chat histories")
+            except Exception as preprocess_error:
+                print(f"Error in preprocessing: {str(preprocess_error)}")
+                return self._fallback_analytics()
+            # Debug the prompt creation
+            try:
+                prompt = self._create_analytics_prompt(processed_histories, all_topics)
+                print("Successfully created prompt")
+                print("Prompt preview:", prompt[:200] + "...") # Print first 200 chars
+            except Exception as prompt_error:
+                print(f"Error in prompt creation: {str(prompt_error)}")
+                return self._fallback_analytics()
+            # Rest of the function remains the same
+            response = self.model.generate_content(
+                prompt,
+                generation_config=genai.GenerationConfig(
+                    response_mime_type="application/json",
+                    temperature=0.15
+                )
+            )
+            if not response.text:
+                print("Empty response from Gemini")
+                return self._fallback_analytics()
+            analytics = json.loads(response.text)
+            return analytics
+        except Exception as e:
+            print(f"Error generating analytics: {str(e)}")
+            print(f"Error type: {type(e)}")
+            import traceback
+            print("Full traceback:", traceback.format_exc())
+            return self._fallback_analytics()
+    def _preprocess_chat_histories(self, chat_histories: List[Dict]) -> List[Dict]:
+        # """Preprocess chat histories to focus on relevant information."""
+        # processed = []
+        # for chat in chat_histories:
+        #     print(str(chat["user_id"]))
+        #     processed_chat = {
+        #         "user_id": str(chat["user_id"]),
+        #         "messages": [
+        #             {
+        #                 "prompt": msg["prompt"],
+        #                 "response": msg["response"]
+        #             }
+        #             for msg in chat["messages"]
+        #         ]
+        #     }
+        #     processed.append(processed_chat)
+        # return processed
+        # Code 2:
+        """Preprocess chat histories to focus on relevant information."""
+        processed = []
+        for chat in chat_histories:
+            # Convert ObjectId to string if it's an ObjectId
+            user_id = str(chat["user_id"]["$oid"]) if isinstance(chat["user_id"], dict) and "$oid" in chat["user_id"] else str(chat["user_id"])
+            try:
+                processed_chat = {
+                    "user_id": user_id,
+                    "messages": [
+                        {
+                            "prompt": msg["prompt"],
+                            "response": msg["response"]
+                        }
+                        for msg in chat["messages"]
+                    ]
+                }
+                processed.append(processed_chat)
+                print(f"Successfully processed chat for user: {user_id}")
+            except Exception as e:
+                print(f"Error processing chat for user: {user_id}")
+                print(f"Error details: {str(e)}")
+                continue
+        return processed
+    def _fallback_analytics(self) -> Dict:
+        # """Provide basic analytics in case of LLM processing failure."""
+        # return {
+        #     "topic_insights": [],
+        #     "student_insights": [],
+        #     "recommended_actions": [
+        #         {
+        #             "action": "Review analytics generation process",
+        #             "priority": "high",
+        #             "target_group": "system_administrators",
+        #             "reasoning": "Analytics generation failed",
+        #             "expected_impact": "Restore analytics functionality"
+        #         }
+        #     ]
+        # }
+        """Provide comprehensive fallback analytics that match our schema."""
+        return {
+            "topic_insights": [],
+            "student_insights": [],
+            "recommended_actions": [
+                {
+                    "action": "Review analytics generation process",
+                    "priority": "high",
+                    "target_group": "system_administrators",
+                    "reasoning": "Analytics generation failed",
+                    "expected_impact": "Restore analytics functionality"
+                }
+            ],
+            "course_health": {
+                "overall_engagement": 0,
+                "critical_topics": [],
+                "class_distribution": {
+                    "high_performers": 0,
+                    "average_performers": 0,
+                    "at_risk": 0
+                }
+            },
+            "intervention_metrics": {
+                "immediate_attention_needed": [],
+                "monitoring_required": []
+            }
+        }
+# if __name__ == "__main__":
+#     # Example usage
+#     analytics_generator = NovaScholarAnalytics()
+#     analytics = analytics_generator.generate_analytics(chat_histories, all_topics)
+#     print(json.dumps(analytics, indent=2))

pre_class_analytics4.py ADDED Viewed

	@@ -0,0 +1,592 @@

+import pandas as pd
+import numpy as np
+from datetime import datetime
+from typing import List, Dict, Any, Tuple
+import spacy
+from collections import Counter, defaultdict
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from textblob import TextBlob
+import networkx as nx
+from scipy import stats
+import logging
+import json
+from dataclasses import dataclass
+from enum import Enum
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class TopicDifficulty(Enum):
+    EASY = "easy"
+    MODERATE = "moderate"
+    DIFFICULT = "difficult"
+    VERY_DIFFICULT = "very_difficult"
+@dataclass
+class QuestionMetrics:
+    complexity_score: float
+    follow_up_count: int
+    clarification_count: int
+    time_spent: float
+    sentiment_score: float
+@dataclass
+class TopicInsights:
+    difficulty_level: TopicDifficulty
+    common_confusion_points: List[str]
+    question_patterns: List[str]
+    time_distribution: Dict[str, float]
+    engagement_metrics: Dict[str, float]
+    recommended_focus_areas: List[str]
+    def to_dict(self):
+        return {
+            "difficulty_level": self.difficulty_level.value,  # Convert enum to its value
+            "common_confusion_points": self.common_confusion_points,
+            "question_patterns": self.question_patterns,
+            "time_distribution": {str(k): v for k, v in self.time_distribution.items()},
+            "engagement_metrics": self.engagement_metrics,
+            "recommended_focus_areas": self.recommended_focus_areas,
+        }
+class PreClassAnalytics:
+    def __init__(self, nlp_model: str = "en_core_web_lg"):
+        """Initialize the analytics system with necessary components."""
+        self.nlp = spacy.load(nlp_model)
+        self.question_indicators = {
+            "what", "why", "how", "when", "where", "which", "who",
+            "whose", "whom", "can", "could", "would", "will", "explain"
+        }
+        self.confusion_indicators = {
+            "confused", "don't understand", "unclear", "not clear",
+            "stuck", "difficult", "hard", "help", "explain again"
+        }
+        self.follow_up_indicators = {
+            "also", "another", "additionally", "furthermore", "moreover",
+            "besides", "related", "similarly", "again"
+        }
+    def preprocess_chat_history(self, chat_history: List[Dict]) -> pd.DataFrame:
+        """Convert chat history to DataFrame with enhanced features."""
+        messages = []
+        for chat in chat_history:
+            user_id = chat['user_id']['$oid']
+            for msg in chat['messages']:
+                try:
+                    # Ensure the timestamp is in the correct format
+                    if isinstance(msg['timestamp'], dict) and '$date' in msg['timestamp']:
+                        timestamp = pd.to_datetime(msg['timestamp']['$date'])
+                    elif isinstance(msg['timestamp'], str):
+                        timestamp = pd.to_datetime(msg['timestamp'])
+                    else:
+                        raise ValueError("Invalid timestamp format")
+                except Exception as e:
+                    print(f"Error parsing timestamp: {msg['timestamp']}, error: {e}")
+                    timestamp = pd.NaT  # Use NaT (Not a Time) for invalid timestamps
+                messages.append({
+                    'user_id': user_id,
+                    'timestamp': timestamp,
+                    'prompt': msg['prompt'],
+                    'response': msg['response'],
+                    'is_question': any(q in msg['prompt'].lower() for q in self.question_indicators),
+                    'shows_confusion': any(c in msg['prompt'].lower() for c in self.confusion_indicators),
+                    'is_followup': any(f in msg['prompt'].lower() for f in self.follow_up_indicators)
+                })
+        df = pd.DataFrame(messages)
+        df['sentiment'] = df['prompt'].apply(lambda x: TextBlob(x).sentiment.polarity)
+        return df
+    def extract_topic_hierarchies(self, df: pd.DataFrame) -> Dict[str, List[str]]:
+        """Extract hierarchical topic relationships from conversations."""
+        topic_hierarchy = defaultdict(list)
+        for _, row in df.iterrows():
+            doc = self.nlp(row['prompt'])
+            # Extract main topics and subtopics using noun chunks and dependencies
+            main_topics = []
+            subtopics = []
+            for chunk in doc.noun_chunks:
+                if chunk.root.dep_ in ('nsubj', 'dobj'):
+                    main_topics.append(chunk.text.lower())
+                else:
+                    subtopics.append(chunk.text.lower())
+            # Build hierarchy
+            for main_topic in main_topics:
+                topic_hierarchy[main_topic].extend(subtopics)
+        # Clean and deduplicate
+        return {k: list(set(v)) for k, v in topic_hierarchy.items()}
+    def analyze_topic_difficulty(self, df: pd.DataFrame, topic: str) -> TopicDifficulty:
+        """Determine topic difficulty based on various metrics."""
+        topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
+        # Calculate difficulty indicators
+        confusion_rate = topic_msgs['shows_confusion'].mean()
+        question_rate = topic_msgs['is_question'].mean()
+        follow_up_rate = topic_msgs['is_followup'].mean()
+        avg_sentiment = topic_msgs['sentiment'].mean()
+        # Calculate composite difficulty score
+        difficulty_score = (
+            confusion_rate * 0.4 +
+            question_rate * 0.3 +
+            follow_up_rate * 0.2 +
+            (1 - (avg_sentiment + 1) / 2) * 0.1
+        )
+        # Map score to difficulty level
+        if difficulty_score < 0.3:
+            return TopicDifficulty.EASY
+        elif difficulty_score < 0.5:
+            return TopicDifficulty.MODERATE
+        elif difficulty_score < 0.7:
+            return TopicDifficulty.DIFFICULT
+        else:
+            return TopicDifficulty.VERY_DIFFICULT
+    def identify_confusion_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
+        """Identify common patterns in student confusion."""
+        confused_msgs = df[
+            (df['prompt'].str.contains(topic, case=False)) &
+            (df['shows_confusion'])
+        ]['prompt']
+        patterns = []
+        for msg in confused_msgs:
+            doc = self.nlp(msg)
+            # Extract key phrases around confusion indicators
+            for sent in doc.sents:
+                for token in sent:
+                    if token.text.lower() in self.confusion_indicators:
+                        # Get context window around confusion indicator
+                        context = sent.text
+                        patterns.append(context)
+        # Group similar patterns
+        if patterns:
+            vectorizer = TfidfVectorizer(ngram_range=(1, 3))
+            tfidf_matrix = vectorizer.fit_transform(patterns)
+            similarity_matrix = cosine_similarity(tfidf_matrix)
+            # Cluster similar patterns
+            G = nx.Graph()
+            for i in range(len(patterns)):
+                for j in range(i + 1, len(patterns)):
+                    if similarity_matrix[i][j] > 0.5:  # Similarity threshold
+                        G.add_edge(i, j)
+            # Extract representative patterns from each cluster
+            clusters = list(nx.connected_components(G))
+            return [patterns[min(cluster)] for cluster in clusters]
+        return []
+    def analyze_question_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
+        """Analyze patterns in student questions about the topic."""
+        topic_questions = df[
+            (df['prompt'].str.contains(topic, case=False)) &
+            (df['is_question'])
+        ]['prompt']
+        question_types = defaultdict(list)
+        for question in topic_questions:
+            doc = self.nlp(question)
+            # Categorize questions
+            if any(token.text.lower() in {"what", "define", "explain"} for token in doc):
+                question_types["conceptual"].append(question)
+            elif any(token.text.lower() in {"how", "steps", "process"} for token in doc):
+                question_types["procedural"].append(question)
+            elif any(token.text.lower() in {"why", "reason", "because"} for token in doc):
+                question_types["reasoning"].append(question)
+            else:
+                question_types["other"].append(question)
+        # Extract patterns from each category
+        patterns = []
+        for category, questions in question_types.items():
+            if questions:
+                vectorizer = TfidfVectorizer(ngram_range=(1, 3))
+                tfidf_matrix = vectorizer.fit_transform(questions)
+                # Get most representative questions
+                feature_array = np.mean(tfidf_matrix.toarray(), axis=0)
+                tfidf_sorting = np.argsort(feature_array)[::-1]
+                features = vectorizer.get_feature_names_out()
+                patterns.append(f"{category}: {' '.join(features[tfidf_sorting[:3]])}")
+        return patterns
+    def analyze_time_distribution(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
+        """Analyze time spent on different aspects of the topic."""
+        topic_msgs = df[df['prompt'].str.contains(topic, case=False)].copy()
+        if len(topic_msgs) < 2:
+            return {}
+        topic_msgs['time_diff'] = topic_msgs['timestamp'].diff()
+        # Calculate time distribution
+        distribution = {
+            'total_time': topic_msgs['time_diff'].sum().total_seconds() / 60,
+            'avg_time_per_message': topic_msgs['time_diff'].mean().total_seconds() / 60,
+            'max_time_gap': topic_msgs['time_diff'].max().total_seconds() / 60,
+            'time_spent_on_questions': topic_msgs[topic_msgs['is_question']]['time_diff'].sum().total_seconds() / 60,
+            'time_spent_on_confusion': topic_msgs[topic_msgs['shows_confusion']]['time_diff'].sum().total_seconds() / 60
+        }
+        return distribution
+    def calculate_engagement_metrics(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
+        """Calculate student engagement metrics for the topic."""
+        topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
+        metrics = {
+            'message_count': len(topic_msgs),
+            'question_ratio': topic_msgs['is_question'].mean(),
+            'confusion_ratio': topic_msgs['shows_confusion'].mean(),
+            'follow_up_ratio': topic_msgs['is_followup'].mean(),
+            'avg_sentiment': topic_msgs['sentiment'].mean(),
+            'engagement_score': 0.0  # Will be calculated below
+        }
+        # Calculate engagement score
+        metrics['engagement_score'] = (
+            metrics['message_count'] * 0.3 +
+            metrics['question_ratio'] * 0.25 +
+            metrics['follow_up_ratio'] * 0.25 +
+            (metrics['avg_sentiment'] + 1) / 2 * 0.2  # Normalize sentiment to 0-1
+        )
+        return metrics
+    def generate_topic_insights(self, df: pd.DataFrame, topic: str) -> TopicInsights:
+        """Generate comprehensive insights for a topic."""
+        difficulty = self.analyze_topic_difficulty(df, topic)
+        confusion_points = self.identify_confusion_patterns(df, topic)
+        question_patterns = self.analyze_question_patterns(df, topic)
+        time_distribution = self.analyze_time_distribution(df, topic)
+        engagement_metrics = self.calculate_engagement_metrics(df, topic)
+        # Generate recommended focus areas based on insights
+        focus_areas = []
+        if difficulty in (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT):
+            focus_areas.append("Fundamental concept reinforcement needed")
+        if confusion_points:
+            focus_areas.append(f"Address common confusion around: {', '.join(confusion_points[:3])}")
+        if engagement_metrics['confusion_ratio'] > 0.3:
+            focus_areas.append("Consider alternative teaching approaches")
+        if time_distribution.get('time_spent_on_questions', 0) > time_distribution.get('total_time', 0) * 0.5:
+            focus_areas.append("More practical examples or demonstrations needed")
+        return TopicInsights(
+            difficulty_level=difficulty,
+            common_confusion_points=confusion_points,
+            question_patterns=question_patterns,
+            time_distribution=time_distribution,
+            engagement_metrics=engagement_metrics,
+            recommended_focus_areas=focus_areas
+        )
+    def analyze_student_progress(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """Analyze individual student progress and learning patterns."""
+        student_progress = {}
+        for student_id in df['user_id'].unique():
+            student_msgs = df[df['user_id'] == student_id]
+            # Calculate student-specific metrics
+            progress = {
+                'total_messages': len(student_msgs),
+                'questions_asked': student_msgs['is_question'].sum(),
+                'confusion_instances': student_msgs['shows_confusion'].sum(),
+                'avg_sentiment': student_msgs['sentiment'].mean(),
+                'topic_engagement': {},
+                'learning_pattern': self._identify_learning_pattern(student_msgs)
+            }
+            # Analyze topic-specific engagement
+            topics = self.extract_topic_hierarchies(student_msgs)
+            for topic in topics:
+                topic_msgs = student_msgs[student_msgs['prompt'].str.contains(topic, case=False)]
+                progress['topic_engagement'][topic] = {
+                    'message_count': len(topic_msgs),
+                    'confusion_rate': topic_msgs['shows_confusion'].mean(),
+                    'sentiment_trend': stats.linregress(
+                        range(len(topic_msgs)),
+                        topic_msgs['sentiment']
+                    ).slope
+                }
+            student_progress[student_id] = progress
+        return student_progress
+    def _identify_learning_pattern(self, student_msgs: pd.DataFrame) -> str:
+        """Identify student's learning pattern based on their interaction style."""
+        # Calculate key metrics
+        question_ratio = student_msgs['is_question'].mean()
+        confusion_ratio = student_msgs['shows_confusion'].mean()
+        follow_up_ratio = student_msgs['is_followup'].mean()
+        sentiment_trend = stats.linregress(
+            range(len(student_msgs)),
+            student_msgs['sentiment']
+        ).slope
+        # Identify pattern
+        if question_ratio > 0.6:
+            return "Inquisitive Learner"
+        elif confusion_ratio > 0.4:
+            return "Needs Additional Support"
+        elif follow_up_ratio > 0.5:
+            return "Deep Dive Learner"
+        elif sentiment_trend > 0:
+            return "Progressive Learner"
+        else:
+            return "Steady Learner"
+    def generate_comprehensive_report(self, chat_history: List[Dict]) -> Dict[str, Any]:
+        """Generate a comprehensive analytics report."""
+        # Preprocess chat history
+        df = self.preprocess_chat_history(chat_history)
+        # Extract topics
+        topics = self.extract_topic_hierarchies(df)
+        report = {
+            'topics': {},
+            'student_progress': self.analyze_student_progress(df),
+            'overall_metrics': {
+                'total_conversations': len(df),
+                'unique_students': df['user_id'].nunique(),
+                'avg_sentiment': df['sentiment'].mean(),
+                'most_discussed_topics': Counter(
+                    topic for topics_list in topics.values()
+                    for topic in topics_list
+                ).most_common(5)
+            }
+        }
+        # Generate topic-specific insights
+        for main_topic, subtopics in topics.items():
+            subtopic_insights = {}
+            for subtopic in subtopics:
+                subtopic_insights[subtopic] = {
+                    'insights': self.generate_topic_insights(df, subtopic),
+                    'related_topics': [t for t in subtopics if t != subtopic],
+                    'student_engagement': {
+                        student_id: self.calculate_engagement_metrics(
+                            df[df['user_id'] == student_id],
+                            subtopic
+                        )
+                        for student_id in df['user_id'].unique()
+                    }
+                }
+            report['topics'][main_topic] = {
+                'insights': self.generate_topic_insights(df, main_topic),
+                'subtopics': subtopic_insights,
+                'topic_relationships': {
+                    'hierarchy_depth': len(subtopics),
+                    'connection_strength': self._calculate_topic_connections(df, main_topic, subtopics),
+                    'progression_path': self._identify_topic_progression(df, main_topic, subtopics)
+                }
+            }
+        # Add temporal analysis
+        report['temporal_analysis'] = {
+            'daily_engagement': df.groupby(df['timestamp'].dt.date).agg({
+                'user_id': 'count',
+                'is_question': 'sum',
+                'shows_confusion': 'sum',
+                'sentiment': 'mean'
+            }).to_dict(),
+            'peak_activity_hours': df.groupby(df['timestamp'].dt.hour)['user_id'].count().nlargest(3).to_dict(),
+            'learning_trends': self._analyze_learning_trends(df)
+        }
+        # Add recommendations
+        report['recommendations'] = self._generate_recommendations(report)
+        return report
+    def _calculate_topic_connections(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> Dict[str, float]:
+        """Calculate connection strength between topics based on co-occurrence."""
+        connections = {}
+        main_topic_msgs = df[df['prompt'].str.contains(main_topic, case=False)]
+        for subtopic in subtopics:
+            cooccurrence = df[
+                df['prompt'].str.contains(main_topic, case=False) &
+                df['prompt'].str.contains(subtopic, case=False)
+            ].shape[0]
+            connection_strength = cooccurrence / len(main_topic_msgs) if len(main_topic_msgs) > 0 else 0
+            connections[subtopic] = connection_strength
+        return connections
+    def _identify_topic_progression(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> List[str]:
+        """Identify optimal topic progression path based on student interactions."""
+        topic_difficulties = {}
+        for subtopic in subtopics:
+            difficulty = self.analyze_topic_difficulty(df, subtopic)
+            topic_difficulties[subtopic] = difficulty.value
+        # Sort subtopics by difficulty
+        return sorted(subtopics, key=lambda x: topic_difficulties[x])
+    def _analyze_learning_trends(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """Analyze overall learning trends across the dataset."""
+        return {
+            'sentiment_trend': stats.linregress(
+                range(len(df)),
+                df['sentiment']
+            )._asdict(),
+            'confusion_trend': stats.linregress(
+                range(len(df)),
+                df['shows_confusion']
+            )._asdict(),
+            'engagement_progression': self._calculate_engagement_progression(df)
+        }
+    def _calculate_engagement_progression(self, df: pd.DataFrame) -> Dict[str, float]:
+        """Calculate how student engagement changes over time."""
+        df['week'] = df['timestamp'].dt.isocalendar().week
+        weekly_engagement = df.groupby('week').agg({
+            'is_question': 'mean',
+            'shows_confusion': 'mean',
+            'is_followup': 'mean',
+            'sentiment': 'mean'
+        })
+        return {
+            'question_trend': stats.linregress(
+                range(len(weekly_engagement)),
+                weekly_engagement['is_question']
+            ).slope,
+            'confusion_trend': stats.linregress(
+                range(len(weekly_engagement)),
+                weekly_engagement['shows_confusion']
+            ).slope,
+            'follow_up_trend': stats.linregress(
+                range(len(weekly_engagement)),
+                weekly_engagement['is_followup']
+            ).slope,
+            'sentiment_trend': stats.linregress(
+                range(len(weekly_engagement)),
+                weekly_engagement['sentiment']
+            ).slope
+        }
+    def _generate_recommendations(self, report: Dict[str, Any]) -> List[str]:
+        """Generate actionable recommendations based on the analysis."""
+        recommendations = []
+        # Analyze difficulty distribution
+        difficult_topics = [
+            topic for topic, data in report['topics'].items()
+            if data['insights'].difficulty_level in
+            (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT)
+        ]
+        if difficult_topics:
+            recommendations.append(
+                f"Consider providing additional resources for challenging topics: {', '.join(difficult_topics)}"
+            )
+        # Analyze student engagement
+        avg_engagement = np.mean([
+            progress['questions_asked'] / progress['total_messages']
+            for progress in report['student_progress'].values()
+        ])
+        if avg_engagement < 0.3:
+            recommendations.append(
+                "Implement more interactive elements to increase student engagement"
+            )
+        # Analyze temporal patterns
+        peak_hours = list(report['temporal_analysis']['peak_activity_hours'].keys())
+        recommendations.append(
+            f"Consider scheduling additional support during peak activity hours: {peak_hours}"
+        )
+        # Analyze learning trends
+        # sentiment_trend = report['temporal_analysis']['learning_trends']['sentiment_trend']
+        # if sentiment_trend < 0:
+        #     recommendations.append(
+        #         "Review teaching approach to address declining student satisfaction"
+        #     )
+        # Analyze learning trends
+        # Analyze learning trends
+        sentiment_trend = report.get('temporal_analysis', {}).get('learning_trends', {}).get('sentiment_trend', None)
+        if isinstance(sentiment_trend, (int, float)):
+            if sentiment_trend < 0:
+                recommendations.append(
+                    "Review teaching approach to address declining student satisfaction"
+                )
+        elif isinstance(sentiment_trend, dict):
+            # Handle the case where sentiment_trend is a dictionary
+            print(f"Unexpected dict format for sentiment_trend: {sentiment_trend}")
+        else:
+            print(f"Unexpected type for sentiment_trend: {type(sentiment_trend)}")
+        return recommendations
+class CustomJSONEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, TopicDifficulty):
+            return obj.value
+        if isinstance(obj, TopicInsights):
+            return obj.to_dict()
+        if isinstance(obj, np.integer):
+            return int(obj)
+        if isinstance(obj, np.floating):
+            return float(obj)
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+        if isinstance(obj, datetime):
+            return obj.isoformat()
+        return super().default(obj)
+def convert_insights_to_dict(report):
+    for main_topic, data in report['topics'].items():
+        if isinstance(data['insights'], TopicInsights):
+            data['insights'] = data['insights'].to_dict()
+        for subtopic, subdata in data['subtopics'].items():
+            if isinstance(subdata['insights'], TopicInsights):
+                subdata['insights'] = subdata['insights'].to_dict()
+if __name__ == "__main__":
+    # Load chat history data
+    chat_history = None
+    with open('sample_files/chat_history_corpus.json', 'r', encoding="utf-8") as file:
+        chat_history = json.load(file)
+    # Initialize analytics system
+    analytics = PreClassAnalytics()
+    # Generate comprehensive report
+    report = analytics.generate_comprehensive_report(chat_history)
+    # Convert insights to dictionary
+    # convert_insights_to_dict(report)
+    print(json.dumps(report, indent=4, cls=CustomJSONEncoder))
+    # print(report)

requirements.txt ADDED Viewed

	@@ -0,0 +1,37 @@

+streamlit
+pymongo
+PyPDF2
+python-docx
+openai
+google-generativeai
+llama-index
+werkzeug
+numpy
+pandas
+plotly
+scikit-learn
+networkx
+community
+umap-learn
+seaborn
+matplotlib
+scipy
+Pillow
+python-dotenv
+zoomus
+asyncio
+google-auth-oauthlib
+google-auth
+transformers
+textstat
+spacy
+streamlit_option_menu
+beautifulsoup4
+youtube-transcript-api
+requests
+xml==0.0.1
+networkx==3.1
+bokeh==3.2.1
+scikit-learn==1.2.2
+langchain==0.0.208

research22.py ADDED Viewed

	@@ -0,0 +1,517 @@

+# if __name__ == "__main__":
+#     main()
+import streamlit as st
+import google.generativeai as genai
+from typing import Dict, Any
+import PyPDF2
+import io
+from pymongo import MongoClient
+from dotenv import load_dotenv
+import os
+import json
+import re
+# --------------------------------------------------------------------------------
+# 1. Environment Setup
+# --------------------------------------------------------------------------------
+load_dotenv()
+# MongoDB
+MONGODB_URI = os.getenv(
+    "MONGODB_UR",
+    "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
+)
+# Gemini
+GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
+# Configure Gemini
+genai.configure(api_key=GEMINI_KEY)
+# --------------------------------------------------------------------------------
+# 2. Database Connection
+# --------------------------------------------------------------------------------
+def create_db_connection():
+    """
+    Create MongoDB connection and return the 'papers' collection.
+    """
+    try:
+        client = MongoClient(MONGODB_URI)
+        db = client["novascholar_db"]  # Database name
+        collection = db["research_papers"]  # Collection name
+        # Ping to confirm connection
+        client.admin.command("ping")
+        return db
+    except Exception as e:
+        st.error(f"Database connection error: {str(e)}")
+        return None
+# --------------------------------------------------------------------------------
+# 3. PDF Text Extraction
+# --------------------------------------------------------------------------------
+def extract_text_from_pdf(pdf_file) -> str:
+    """
+    Extract all text from a PDF.
+    """
+    try:
+        pdf_reader = PyPDF2.PdfReader(pdf_file)
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text() + "\n"
+        return text
+    except Exception as e:
+        st.error(f"Error processing PDF: {str(e)}")
+        return ""
+# --------------------------------------------------------------------------------
+# 4. Gemini Response Helper
+# --------------------------------------------------------------------------------
+def get_gemini_response(prompt: str) -> str:
+    """
+    Sends a prompt to Google's Gemini model and returns the response text.
+    Adjust this function as needed for your generative AI usage.
+    """
+    try:
+        model = genai.GenerativeModel("gemini-pro")
+        response = model.generate_content(prompt)
+        return response.text
+    except Exception as e:
+        st.error(f"Gemini API Error: {str(e)}")
+        return ""
+# --------------------------------------------------------------------------------
+# 5. Basic Info Extraction
+# --------------------------------------------------------------------------------
+def extract_basic_info(text: str) -> Dict[str, str]:
+    """
+    Extract title, publication, journal/conference, abstract, keywords, author, and date from the paper text.
+    Return a dictionary with these fields.
+    """
+    prompt = f"""
+    Extract the following fields from the research paper text below:
+    Title
+    Publication
+    Journal_Conference
+    Abstract
+    Keywords
+    Author
+    Date_of_Publication
+    Paper text:
+    {text}
+    Return them in this format:
+    Title: ...
+    Publication: ...
+    Journal_Conference: ...
+    Abstract: ...
+    Keywords: ...
+    Author: ...
+    Date_of_Publication: ...
+    """
+    response = get_gemini_response(prompt)
+    if not response:
+        return {}
+    info = {}
+    lines = response.split("\n")
+    for line in lines:
+        if ":" in line:
+            key, value = line.split(":", 1)
+            info[key.strip()] = value.strip()
+    return info
+# --------------------------------------------------------------------------------
+# 6. Content Sections Extraction
+# --------------------------------------------------------------------------------
+def extract_content_sections(text: str) -> Dict[str, str]:
+    """
+    Extract expanded sections: Intro, Literature_Review, Research_Models_Used,
+    Methodology, Discussion, Future_Scope, Theory.
+    """
+    prompt = f"""Please extract these sections from the research paper:
+    1. Introduction
+    2. Literature Review
+    3. Research Models Used
+    4. Methodology
+    5. Discussion
+    6. Future Scope
+    7. Theory
+    Paper text: {text}
+    Return in this exact format without any additional text or explanations also make sure
+    no data should be empty (at least 10-15 words) and it should be meaningful:
+    Intro: <text>
+    Literature_Review: <text>
+    Research_Models_Used: <text>
+    Methodology: <text>
+    Discussion: <text>
+    Future_Scope: <text>
+    Theory: <text>
+    """
+    response = get_gemini_response(prompt)
+    if not response:
+        return {}
+    sections = {}
+    lines = response.split("\n")
+    for line in lines:
+        if ":" in line:
+            key, value = line.split(":", 1)
+            sections[key.strip()] = value.strip()
+    return sections
+# --------------------------------------------------------------------------------
+# 7. Variables Extraction
+# --------------------------------------------------------------------------------
+def extract_variables(text: str) -> Dict[str, Any]:
+    """
+    Extract variable data: Independent_Variables, nof_Independent_Variables,
+    Dependent_Variables, nof_Dependent_Variables, Control_Variables,
+    Extraneous_Variables, nof_Control_Variables, nof_Extraneous_Variables
+    """
+    prompt = f"""From the paper text, extract the following fields:
+    1. Independent_Variables
+    2. nof_Independent_Variables
+    3. Dependent_Variables
+    4. nof_Dependent_Variables
+    5. Control_Variables
+    6. Extraneous_Variables
+    7. nof_Control_Variables
+    8. nof_Extraneous_Variables
+    Return them in this format:
+    Independent_Variables: <list>
+    nof_Independent_Variables: <integer>
+    Dependent_Variables: <list>
+    nof_Dependent_Variables: <integer>
+    Control_Variables: <list>
+    Extraneous_Variables: <list>
+    nof_Control_Variables: <integer>
+    nof_Extraneous_Variables: <integer>
+    Paper text: {text}
+    """
+    response = get_gemini_response(prompt)
+    if not response:
+        return {}
+    variables = {}
+    lines = response.split("\n")
+    for line in lines:
+        if ":" in line:
+            key, value = line.split(":", 1)
+            # Attempt to convert to integer where appropriate
+            clean_key = key.strip()
+            clean_value = value.strip()
+            if clean_key.startswith("nof_"):
+                try:
+                    variables[clean_key] = int(clean_value)
+                except ValueError:
+                    # fallback if it's not an integer
+                    variables[clean_key] = 0
+            else:
+                variables[clean_key] = clean_value
+    return variables
+# --------------------------------------------------------------------------------
+# 8. Utility to ensure no empty fields (example logic)
+# --------------------------------------------------------------------------------
+def ensure_non_empty_values(data: Dict[str, Any], fallback_text: str) -> Dict[str, Any]:
+    """
+    Ensure each extracted field has meaningful content. If empty, fill with default text.
+    """
+    for k, v in data.items():
+        if not v or len(str(v).split()) < 3:  # example check for minimal words
+            data[k] = f"No sufficient data found for {k}. Could not parse."
+    return data
+# --------------------------------------------------------------------------------
+# 9. Processing the Paper
+# --------------------------------------------------------------------------------
+# def process_paper(text: str) -> Dict[str, Any]:
+#     """
+#     Orchestrate calls to extract basic info, content sections, and variables.
+#     Return a dictionary containing all the fields with consistent naming.
+#     """
+#     with st.spinner("Extracting basic information..."):
+#         basic_info = extract_basic_info(text)
+#         basic_info = ensure_non_empty_values(basic_info, text)
+#     with st.spinner("Extracting content sections..."):
+#         content_sections = extract_content_sections(text)
+#         content_sections = ensure_non_empty_values(content_sections, text)
+#     with st.spinner("Extracting variables..."):
+#         variables_info = extract_variables(text)
+#         variables_info = ensure_non_empty_values(variables_info, text)
+#     # Create a single dictionary with all fields
+#     paper_doc = {
+#         "Title": basic_info.get("Title", ""),
+#         "Publication": basic_info.get("Publication", ""),
+#         "Journal_Conference": basic_info.get("Journal_Conference", ""),
+#         "Abstract": basic_info.get("Abstract", ""),
+#         "Keywords": basic_info.get("Keywords", ""),
+#         "Author": basic_info.get("Author", ""),
+#         "Date_of_Publication": basic_info.get("Date_of_Publication", ""),
+#         "Intro": content_sections.get("Intro", ""),
+#         "Literature_Review": content_sections.get("Literature_Review", ""),
+#         "Research_Models_Used": content_sections.get("Research_Models_Used", ""),
+#         "Methodology": content_sections.get("Methodology", ""),
+#         "Discussion": content_sections.get("Discussion", ""),
+#         "Future_Scope": content_sections.get("Future_Scope", ""),
+#         "Theory": content_sections.get("Theory", ""),
+#         "Independent_Variables": variables_info.get("Independent_Variables", ""),
+#         "nof_Independent_Variables": variables_info.get("nof_Independent_Variables", 0),
+#         "Dependent_Variables": variables_info.get("Dependent_Variables", ""),
+#         "nof_Dependent_Variables": variables_info.get("nof_Dependent_Variables", 0),
+#         "Control_Variables": variables_info.get("Control_Variables", ""),
+#         "Extraneous_Variables": variables_info.get("Extraneous_Variables", ""),
+#         "nof_Control_Variables": variables_info.get("nof_Control_Variables", 0),
+#         "nof_Extraneous_Variables": variables_info.get("nof_Extraneous_Variables", 0),
+#     }
+#     return paper_doc
+# filepath: /c:/Users/acer/OneDrive/Documents/GitHub/res-cor/research22.py
+# ...existing code continues...
+# --------------------------------------------------------------------------------
+# 3. Paper Type Attributes
+# --------------------------------------------------------------------------------
+PAPER_TYPE_ATTRIBUTES = {
+    "Review Based Paper": [
+        "Title",
+        "Publication",
+        "Journal_Conference",
+        "Abstract",
+        "Keywords",
+        "Author",
+        "Date_of_Publication",
+        "Intro",
+        "Literature_Review",
+        "Body",
+        "Protocol",
+        "Search String",
+        "Included Studies",
+        "Data Collection and Analysis Methods",
+        "Data Extraction Table",
+        "Synthesis and Analysis",
+        "Conclusion",
+        "Limitations",
+        "Results",
+        "References",
+        "Risk of Bias Assessment",
+    ],
+    "Opinion/Perspective Based Paper": [
+        "Title",
+        "Publication",
+        "Journal_Conference",
+        "Abstract",
+        "Keywords",
+        "Author",
+        "Date_of_Publication",
+        "Intro",
+        "Literature_Review",
+        "Introduction",
+        "Body",
+        "Results and Discussion",
+        "Conclusion",
+        "References",
+    ],
+    "Empirical Research Paper": [
+        "Title",
+        "Publication",
+        "Journal_Conference",
+        "Abstract",
+        "Keywords",
+        "Author",
+        "Date_of_Publication",
+        "Intro",
+        "Literature_Review",
+        "Introduction",
+        "Body",
+        "Methodology",
+        "Participants",
+        "Survey Instrument",
+        "Data Collection",
+        "Data Analysis",
+        "Results and Discussion",
+        "Conclusion",
+        "References",
+    ],
+    "Research Paper (Other)": [
+        "Title",
+        "Publication",
+        "Journal_Conference",
+        "Abstract",
+        "Keywords",
+        "Author",
+        "Date_of_Publication",
+        "Intro",
+        "Literature_Review",
+        "Research_Models_Used",
+        "Methodology",
+        "Discussion",
+        "Future_Scope",
+        "Theory",
+        "Independent_Variables",
+        "nof_Independent_Variables",
+        "Dependent_Variables",
+        "nof_Dependent_Variables",
+        "Control_Variables",
+        "Extraneous_Variables",
+        "nof_Control_Variables",
+        "nof_Extraneous_Variables",
+    ],
+}
+# --------------------------------------------------------------------------------
+# 4. Extract Paper Fields
+# --------------------------------------------------------------------------------
+def extract_paper_fields(text: str, paper_type: str) -> Dict[str, Any]:
+    """
+    Use Gemini to extract fields based on the paper type attributes,
+    then return a dictionary of extracted fields.
+    """
+    if paper_type not in PAPER_TYPE_ATTRIBUTES:
+        st.error("Invalid paper type selected.")
+        return {}
+    selected_attrs = PAPER_TYPE_ATTRIBUTES[paper_type]
+    prompt = f"""
+    Extract the following fields from the research paper text below:
+    {", ".join(selected_attrs)}
+    Paper text:
+    {text}
+    Return them in this JSON format strictly, with no extra text:
+    [
+        {{
+            {", ".join([f'"{attr}": "value"' for attr in selected_attrs])}
+        }}
+    ]
+    """
+    try:
+        response = get_gemini_response(prompt)
+        if not response:
+            st.error("No response from Gemini.")
+            return {}
+        # Clean up any text around JSON
+        # Clean up any text around JSON
+        raw_text = response.strip()
+        # Find start and end of JSON
+        json_start = raw_text.find("[")
+        json_end = raw_text.rfind("]") + 1
+        json_str = raw_text[json_start:json_end]
+        # Try removing trailing commas, extra quotes, etc.
+        json_str = re.sub(r",\s*}", "}", json_str)
+        json_str = re.sub(r",\s*\]", "]", json_str)
+        try:
+            data = json.loads(json_str)
+        except json.JSONDecodeError as e:
+            st.warning(f"Fixing JSON errors: {str(e)}")
+            # As a last-resort attempt, remove anything after the last curly bracket
+            bracket_pos = json_str.rfind("}")
+            if bracket_pos != -1:
+                json_str = json_str[: bracket_pos + 1]
+            # Try again
+            data = json.loads(json_str)
+        if isinstance(data, list) and len(data) > 0:
+            return data[0]
+        else:
+            st.error("Gemini did not return a valid JSON array.")
+            return {}
+    except Exception as e:
+        st.error(f"Error in Gemini extraction: {str(e)}")
+        return {}
+# --------------------------------------------------------------------------------
+# 5. Process Paper and Save
+# --------------------------------------------------------------------------------
+def process_paper(text: str, paper_type: str):
+    """
+    Extract paper fields based on paper type, then save to
+    the corresponding MongoDB collection.
+    """
+    db = create_db_connection()
+    if not db:
+        return
+    # Determine collection name
+    collection_name = paper_type.replace(" ", "_").lower()
+    collection = db[collection_name]
+    # Extract fields
+    extracted_data = extract_paper_fields(text, paper_type)
+    if extracted_data:
+        # Insert into MongoDB
+        collection.insert_one(extracted_data)
+        return extracted_data
+    return {}
+# --------------------------------------------------------------------------------
+# 6. Streamlit UI for Paper Extraction
+# --------------------------------------------------------------------------------
+def main():
+    # st.set_page_config(page_title="Extract Research Paper", layout="wide")
+    st.title("Extract Research Paper")
+    paper_type = st.selectbox(
+        "Select type of research paper:",
+        [
+            "Review Based Paper",
+            "Opinion/Perspective Based Paper",
+            "Empirical Research Paper",
+            "Research Paper (Other)",
+        ],
+    )
+    uploaded_file = st.file_uploader("Upload a PDF or text file", type=["pdf", "txt"])
+    if st.button("Extract & Save") and uploaded_file:
+        try:
+            # Read file content
+            if uploaded_file.type == "application/pdf":
+                pdf_reader = PyPDF2.PdfReader(uploaded_file)
+                text_content = ""
+                for page in pdf_reader.pages:
+                    text_content += page.extract_text()
+            else:
+                text_content = uploaded_file.read().decode("utf-8", errors="replace")
+            with st.spinner("Extracting fields..."):
+                data = process_paper(text_content, paper_type)
+            if data:
+                st.success(
+                    f"Paper extracted and saved to MongoDB in '{paper_type}' collection!"
+                )
+                st.write("Extracted fields:")
+                st.json(data)
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+# ...existing code (if any)...
+if __name__ == "__main__":
+    main()

research3.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import streamlit as st
+import pandas as pd
+import requests
+import json
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
+PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
+def call_perplexity_api(prompt: str) -> str:
+    """Call Perplexity AI with a prompt, return the text response if successful."""
+    headers = {
+        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    payload = {
+        "model": "llama-3.1-sonar-small-128k-chat",
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.3,
+    }
+    try:
+        response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except Exception as e:
+        st.error(f"API Error: {str(e)}")
+        return ""
+def generate_research_paper(df: pd.DataFrame, topic: str) -> dict:
+    """
+    For each column in the DataFrame, generate a research paper section (200-500 words)
+    that addresses the data in that column on the given topic. Return a dict: column -> text.
+    """
+    paper_sections = {}
+    for col in df.columns:
+        # Convert all non-null rows in the column to strings and join them for context
+        col_values = df[col].dropna().astype(str).tolist()
+        # We'll truncate if there's a ton of text
+        sample_text = " | ".join(col_values[:50])  # limit to first 50 rows for brevity
+        prompt = f"""
+        Topic: {topic}
+        Column: {col}
+        Data Samples: {sample_text}
+        Generate a well-structured research paper section that addresses the topic above,
+        referencing relevant information from the column data.
+        The section should be at least 100 words and at most 150 words.
+        Provide insights, examples, and possible research directions integrating the corpus data.
+        """
+        section_text = call_perplexity_api(prompt)
+        paper_sections[col] = section_text.strip() if section_text else ""
+    return paper_sections
+def format_paper(paper_dict: dict, topic: str) -> str:
+    """
+    Format the generated paper into a Markdown string.
+    Add the topic as the main title, each column name as a heading, and
+    the corresponding text as paragraph content.
+    """
+    md_text = f"# Research Paper on: {topic}\n\n"
+    for col, content in paper_dict.items():
+        md_text += f"## {col}\n{content}\n\n"
+    return md_text
+def main():
+    st.title("Topic + Corpus-Based Research Paper Generator")
+    topic_input = st.text_input("Enter the topic for the research paper:")
+    uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
+    if uploaded_file:
+        df = pd.read_csv(uploaded_file)
+        st.write("### Preview of Uploaded Data")
+        st.dataframe(df.head())
+        if st.button("Generate Research Paper"):
+            if topic_input.strip():
+                st.info("Generating paper based on the topic and the corpus columns...")
+                with st.spinner("Calling Perplexity AI..."):
+                    paper = generate_research_paper(df, topic_input)
+                    if paper:
+                        formatted_paper = format_paper(paper, topic_input)
+                        st.success("Research Paper Generated Successfully!")
+                        st.write(formatted_paper)
+                        st.download_button(
+                            label="Download Paper as Markdown",
+                            data=formatted_paper,
+                            file_name="research_paper.md",
+                            mime="text/markdown",
+                        )
+                    else:
+                        st.error(
+                            "Paper generation failed. Please check Perplexity API key."
+                        )
+            else:
+                st.warning("Please enter a valid topic.")
+if __name__ == "__main__":
+    main()

research_assistant_dashboard.py ADDED Viewed

	@@ -0,0 +1,349 @@

+# import streamlit as st
+# from openai import OpenAI
+# import os
+# from dotenv import load_dotenv
+# from llama_index.core import (
+#     VectorStoreIndex,
+#     SimpleDirectoryReader,
+#     Document,
+#     GPTVectorStoreIndex,
+# )
+# from bson import ObjectId
+# import requests
+# import openai
+# import numpy as np
+# from pymongo import MongoClient
+# from bson import ObjectId
+# from datetime import datetime
+# from llama_index.embeddings.openai import OpenAIEmbedding
+# from typing import List, Dict
+# # Initialize Perplexity API and OpenAI API
+# load_dotenv()
+# perplexity_api_key = os.getenv("PERPLEXITY_KEY")
+# openai.api_key = os.getenv("OPENAI_KEY")
+# # MongoDB setup
+# MONGO_URI = os.getenv("MONGO_URI")
+# client = MongoClient(MONGO_URI)
+# db = client["novascholar_db"]
+# research_papers_collection = db["research_papers"]
+# def fetch_perplexity_data(api_key, topic):
+#     """
+#     Fetch research papers data from Perplexity API with proper formatting
+#     """
+#     headers = {
+#         "accept": "application/json",
+#         "content-type": "application/json",
+#         "authorization": f"Bearer {api_key}",
+#     }
+#     # Structured prompt to get properly formatted response
+#     messages = [
+#         {
+#             "role": "system",
+#             "content": """You are a research paper retrieval expert. For the given topic, return exactly 10 research papers in the following format:
+#             Title: Paper Title
+#             Authors: Author 1, Author 2
+#             Year: YYYY
+#             Content: Detailed paper content with abstract and key findings
+#             URL: DOI or paper URL
+#             """,
+#         },
+#         {"role": "user", "content": f"Find 10 research papers about: {topic}"},
+#     ]
+#     try:
+#         client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
+#         response = client.chat.completions.create(
+#             model="llama-3.1-sonar-small-128k-chat",  # Use the best Perplexity model
+#             messages=messages,
+#         )
+#         # Extract and validate response
+#         content = response.choices[0].message.content
+#         st.write("Fetched Data:", content)  # Debugging line to check the fetched data
+#         return content
+#     except Exception as e:
+#         st.error(f"Failed to fetch data from Perplexity API: {str(e)}")
+#         return ""
+# def split_and_vectorize_papers(content: str) -> List[Dict]:
+#     """Split and vectorize papers using OpenAI embeddings"""
+#     papers = content.split("\n\n")
+#     # Initialize OpenAI client
+#     # client = OpenAI()  # Uses api_key from environment variable
+#     vectors = []
+#     for paper in papers:
+#         try:
+#             # Get embedding using OpenAI's API directly
+#             response = openai.embeddings.create(
+#                 model="text-embedding-ada-002", input=paper, encoding_format="float"
+#             )
+#             # Extract embedding from response
+#             embedding = response.data[0].embedding
+#             vectors.append(
+#                 {"content": paper, "vector": embedding, "timestamp": datetime.utcnow()}
+#             )
+#         except Exception as e:
+#             st.error(f"Error vectorizing paper: {str(e)}")
+#             continue
+#     return vectors
+# def store_papers_in_mongodb(papers):
+#     """Store papers with vectors in MongoDB"""
+#     try:
+#         for paper in papers:
+#             # Prepare MongoDB document
+#             mongo_doc = {
+#                 "content": paper["content"],
+#                 "vector": paper["vector"],
+#                 "created_at": datetime.utcnow(),
+#             }
+#             # Insert into MongoDB
+#             db.papers.update_one(
+#                 {"content": paper["content"]}, {"$set": mongo_doc}, upsert=True
+#             )
+#         st.success(f"Stored {len(papers)} papers in database")
+#         return True
+#     except Exception as e:
+#         st.error(f"Error storing papers: {str(e)}")
+# def get_research_papers(query):
+#     """
+#     Get and store research papers with improved error handling
+#     """
+#     # Fetch papers from Perplexity
+#     content = fetch_perplexity_data(perplexity_api_key, query)
+#     if not content:
+#         return []
+#     # Split and vectorize papers
+#     papers = split_and_vectorize_papers(content)
+#     # Store papers in MongoDB
+#     if store_papers_in_mongodb(papers):
+#         return papers
+#     else:
+#         st.warning("Failed to store papers in database, but returning fetched results")
+#         return papers
+# def analyze_research_gaps(papers):
+#     """
+#     Analyze research gaps with improved prompt and error handling
+#     """
+#     if not papers:
+#         return "No papers provided for analysis"
+#     # Prepare paper summaries for analysis
+#     paper_summaries = "\n\n".join(
+#         [
+#             f"Key Findings: {paper['content'][:500]}..."
+#             # f"Title: {paper['title']}\nYear: {paper['year']}\nKey Findings: {paper['content'][:500]}..."
+#             for paper in papers
+#         ]
+#     )
+#     headers = {
+#         "Authorization": f"Bearer {perplexity_api_key}",
+#         "Content-Type": "application/json",
+#     }
+#     data = {
+#         "messages": [
+#             {
+#                 "role": "system",
+#                 "content": "You are a research analysis expert. Identify specific research gaps and future research directions based on the provided papers. Format your response with clear sections: Current State, Identified Gaps, and Future Directions.",
+#             },
+#             {
+#                 "role": "user",
+#                 "content": f"Analyze these papers and identify research gaps:\n\n{paper_summaries}",
+#             },
+#         ]
+#     }
+#     try:
+#         client = OpenAI(
+#             api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
+#         )
+#         response = client.chat.completions.create(
+#             model="llama-3.1-sonar-small-128k-chat",  # Use the best Perplexity model
+#             messages=data["messages"],
+#         )
+#         return response.choices[0].message.content
+#     except Exception as e:
+#         st.error(f"Failed to analyze research gaps: {str(e)}")
+#         return "Error analyzing research gaps"
+# def create_research_paper(gaps, topic, papers):
+#     """
+#     Create a research paper that addresses the identified gaps using Perplexity API
+#     """
+#     full_texts = "\n\n".join([paper["content"] for paper in papers])
+#     headers = {
+#         "Authorization": f"Bearer {perplexity_api_key}",
+#         "Content-Type": "application/json",
+#     }
+#     data = {
+#         "messages": [
+#             {
+#                 "role": "system",
+#                 "content": "You are a research paper generation expert. Create a comprehensive research paper that addresses the identified gaps based on the provided papers. Format your response with clear sections: Introduction, Literature Review, Methodology, Results, Discussion, Conclusion, and References.",
+#             },
+#             {
+#                 "role": "user",
+#                 "content": f"Create a research paper on the topic '{topic}' that addresses the following research gaps:\n\n{gaps}\n\nBased on the following papers:\n\n{full_texts}",
+#             },
+#         ]
+#     }
+#     try:
+#         client = OpenAI(
+#             api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
+#         )
+#         response = client.chat.completions.create(
+#             model="llama-3.1-sonar-small-128k-chat",  # Use the best Perplexity model
+#             messages=data["messages"],
+#         )
+#         return response.choices[0].message.content
+#     except Exception as e:
+#         st.error(f"Failed to create research paper: {str(e)}")
+#         return "Error creating research paper"
+# def cosine_similarity(vec1, vec2):
+#     """Calculate the cosine similarity between two vectors"""
+#     vec1 = np.array(vec1)
+#     vec2 = np.array(vec2)
+#     return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
+# def calculate_cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
+#     """Calculate cosine similarity between two vectors"""
+#     return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
+# def display_research_assistant_dashboard():
+#     """Display research assistant dashboard"""
+#     # Initialize session state for recommendations
+#     if "recommendations" not in st.session_state:
+#         st.session_state.recommendations = None
+#     if "vectors" not in st.session_state:
+#         st.session_state.vectors = None
+#     if "generated_paper" not in st.session_state:
+#         st.session_state.generated_paper = None
+#     # Sidebar
+#     with st.sidebar:
+#         st.title(f"Welcome, {st.session_state.username}")
+#         if st.button("Logout", use_container_width=True):
+#             for key in st.session_state.keys():
+#                 del st.session_state[key]
+#             st.rerun()
+#     # Main content
+#     st.title("Research Paper Recommendations")
+#     search_query = st.text_input("Enter research topic:")
+#     col1, col2 = st.columns(2)
+#     with col1:
+#         if st.button("Get Research Papers"):
+#             if search_query:
+#                 with st.spinner("Fetching recommendations..."):
+#                     st.session_state.recommendations = get_research_papers(search_query)
+#                     st.session_state.vectors = [
+#                         paper["vector"] for paper in st.session_state.recommendations
+#                     ]
+#                     st.markdown(
+#                         "\n\n".join(
+#                             [
+#                                 f"**{i+1}.**\n{paper['content']}"
+#                                 # f"**{i+1}. {paper['title']}**\n{paper['content']}"
+#                                 for i, paper in enumerate(
+#                                     st.session_state.recommendations
+#                                 )
+#                             ]
+#                         )
+#                     )
+#             else:
+#                 st.warning("Please enter a search query")
+#     with col2:
+#         if st.button("Analyze Research Gaps"):
+#             if st.session_state.recommendations:
+#                 with st.spinner("Analyzing research gaps..."):
+#                     gaps = analyze_research_gaps(st.session_state.recommendations)
+#                     st.session_state.generated_paper = create_research_paper(
+#                         gaps, search_query, st.session_state.recommendations
+#                     )
+#                     st.markdown("### Potential Research Gaps")
+#                     st.markdown(gaps)
+#             else:
+#                 st.warning("Please get research papers first")
+#     if st.button("Save and Vectorize"):
+#         if st.session_state.generated_paper:
+#             try:
+#                 # Initialize OpenAI client
+#                 # Get embedding for generated paper
+#                 response = openai.embeddings.create(
+#                     model="text-embedding-ada-002",
+#                     input=st.session_state.generated_paper,
+#                     encoding_format="float",
+#                 )
+#                 generated_vector = response.data[0].embedding
+#                 # Calculate similarities with stored vectors
+#                 similarities = [
+#                     calculate_cosine_similarity(generated_vector, paper_vector)
+#                     for paper_vector in st.session_state.vectors
+#                 ]
+#                 # Display results
+#                 st.markdown("### Generated Research Paper")
+#                 st.markdown(st.session_state.generated_paper)
+#                 st.markdown("### Cosine Similarities with Original Papers")
+#                 for i, similarity in enumerate(similarities):
+#                     st.metric(
+#                         f"Paper {i+1}",
+#                         value=f"{similarity:.3f}",
+#                         help="Cosine similarity (1.0 = identical, 0.0 = completely different)",
+#                     )
+#             except Exception as e:
+#                 st.error(f"Error during vectorization: {str(e)}")
+#         else:
+#             st.warning("Please analyze research gaps first")
+# # Run the dashboard
+# if __name__ == "__main__":
+#     display_research_assistant_dashboard()
+import research_combine2
+# if __name__ == "__main__":
+#     display_research_assistant_dashboard()
+def display_research_assistant_dashboard():
+    research_combine2.display_research_assistant_dashboard()

research_combine.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import streamlit as st
+import os
+import json
+import requests
+from dotenv import load_dotenv
+from pymongo import MongoClient
+from typing import Dict, Any
+# Load environment variables
+load_dotenv()
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
+PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
+MONGODB_URI = os.getenv(
+    "MONGODB_UR",
+    "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
+)
+# MongoDB setup
+client = MongoClient(MONGODB_URI)
+db = client["novascholar_db"]
+collection = db["research_papers"]
+def search_papers(topic: str, num_papers: int) -> str:
+    headers = {
+        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    prompt = f"""Find {num_papers} recent research papers about {topic}.
+    Return ONLY a valid JSON array with the following structure for each paper, no additional text:
+    [
+        {{
+            "Title": "paper title",
+            "Publication": "publication name",
+            "Journal_Conference": "venue name",
+            "Abstract": "abstract text",
+            "Keywords": "key terms",
+            "Author": "author names",
+            "Date_of_Publication": "publication date",
+            "Intro": "introduction summary",
+            "Literature_Review": "literature review summary",
+            "Research_Models_Used": "models description",
+            "Methodology": "methodology description",
+            "Discussion": "discussion summary",
+            "Future_Scope": "future work",
+            "Theory": "theoretical framework",
+            "Independent_Variables": "list of variables",
+            "nof_Independent_Variables": 0,
+            "Dependent_Variables": "list of variables",
+            "nof_Dependent_Variables": 0,
+            "Control_Variables": "list of variables",
+            "nof_Control_Variables": 0,
+            "Extraneous_Variables": "list of variables",
+            "nof_Extraneous_Variables": 0
+        }}
+    ]"""
+    payload = {
+        "model": "llama-3.1-sonar-small-128k-chat",
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a research paper analyzer that returns only valid JSON arrays.",
+            },
+            {"role": "user", "content": prompt},
+        ],
+        "temperature": 0.1,
+    }
+    try:
+        response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
+        response.raise_for_status()
+        content = response.json()["choices"][0]["message"]["content"]
+        # Clean response and ensure it's valid JSON
+        content = content.strip()
+        if not content.startswith("["):
+            content = content[content.find("[") :]
+        if not content.endswith("]"):
+            content = content[: content.rfind("]") + 1]
+        # Validate JSON
+        papers = json.loads(content)
+        if not isinstance(papers, list):
+            raise ValueError("Response is not a JSON array")
+        # Insert into MongoDB
+        if papers:
+            collection.insert_many(papers)
+            return content
+        return "[]"
+    except json.JSONDecodeError as e:
+        st.error(f"Invalid JSON response: {str(e)}")
+        return None
+    except Exception as e:
+        st.error(f"Error: {str(e)}")
+        return None
+import research22
+import keywords_database_download
+import new_keywords
+import infranew
+import loldude
+import new_research_paper
+import research3
+import entire_download
+def main():
+    st.set_page_config(page_title="Research Papers", layout="wide")
+    st.title("Research Papers")
+    # Sidebar radio
+    option = st.sidebar.radio(
+        "Select an option",
+        [
+            "Search Papers",
+            "Upload Paper",
+            "Single Keyword Search",
+            "Multiple Keywords Search",
+            "Knowledge Graph",
+            "Cosine Similarity",
+            "Paper Generator",
+            "Paper from Topic",
+            "Download Entire Corpus",
+        ],
+    )
+    if option == "Search Papers":
+        st.subheader("Search and Store Papers")
+        topic = st.text_input("Enter research topic")
+        num_papers = st.number_input(
+            "Number of papers", min_value=1, max_value=10, value=5
+        )
+        if st.button("Search and Store"):
+            if topic:
+                with st.spinner(f"Searching and storing papers about {topic}..."):
+                    results = search_papers(topic, num_papers)
+                    if results:
+                        st.success(
+                            f"Successfully stored {num_papers} papers in MongoDB"
+                        )
+                        # Display results
+                        papers = json.loads(results)
+                        for paper in papers:
+                            with st.expander(paper["Title"]):
+                                for key, value in paper.items():
+                                    if key != "Title":
+                                        st.write(f"**{key}:** {value}")
+            else:
+                st.warning("Please enter a research topic")
+        # Add MongoDB connection status
+        if st.sidebar.button("Check Database Connection"):
+            try:
+                client.admin.command("ping")
+                print(MONGODB_URI)
+                st.sidebar.success("Connected to MongoDB")
+            except Exception as e:
+                st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
+    elif option == "Single Keyword Search":
+        keywords_database_download.main()
+    elif option == "Multiple Keywords Search":
+        new_keywords.main()
+    elif option == "Knowledge Graph":
+        infranew.main()
+    elif option == "Cosine Similarity":
+        loldude.main()
+    elif option == "Paper Generator":
+        new_research_paper.main()
+    elif option == "Paper from Topic":
+        research3.main()
+    elif option == "Download Entire Corpus":
+        entire_download.main()
+    else:
+        # st.subheader("Blank Page")
+        # st.write("This is a placeholder for alternative content.")
+        research22.main()
+if __name__ == "__main__":
+    main()

research_combine2.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import new_research_paper
+import research3
+import entire_download
+import streamlit as st
+import os
+import json
+import requests
+from dotenv import load_dotenv
+from pymongo import MongoClient
+from typing import Dict, Any
+import research22
+import keywords_database_download
+import new_keywords
+import infranew
+import loldude
+import new_research_paper
+import research3
+import entire_download
+import sciclone
+import extract
+# Load environment variables
+load_dotenv()
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
+PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
+MONGODB_URI = os.getenv(
+    "MONGODB_UR",
+    "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
+)
+# MongoDB setup
+client = MongoClient(MONGODB_URI)
+db = client["novascholar_db"]
+def search_papers(topic: str, num_papers: int, paper_type: str) -> str:
+    headers = {
+        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    attributes = {
+        "Review Based Paper": [
+            "Title",
+            "Publication",
+            "Journal_Conference",
+            "Abstract",
+            "Keywords",
+            "Author",
+            "Date_of_Publication",
+            "Intro",
+            "Literature_Review",
+            "Body",
+            "Protocol",
+            "Search String",
+            "Included Studies",
+            "Data Collection and Analysis Methods",
+            "Data Extraction Table",
+            "Synthesis and Analysis",
+            "Conclusion",
+            "Limitations",
+            "Results",
+            "References",
+            "Risk of Bias Assessment",
+        ],
+        "Opinion/Perspective Based Paper": [
+            "Title",
+            "Publication",
+            "Journal_Conference",
+            "Abstract",
+            "Keywords",
+            "Author",
+            "Date_of_Publication",
+            "Intro",
+            "Literature_Review",
+            "Introduction",
+            "Body",
+            "Results and Discussion",
+            "Conclusion",
+            "References",
+        ],
+        "Empirical Research Paper": [
+            "Title",
+            "Publication",
+            "Journal_Conference",
+            "Abstract",
+            "Keywords",
+            "Author",
+            "Date_of_Publication",
+            "Intro",
+            "Literature_Review",
+            "Introduction",
+            "Body",
+            "Methodology",
+            "Participants",
+            "Survey Instrument",
+            "Data Collection",
+            "Data Analysis",
+            "Results and Discussion",
+            "Conclusion",
+            "References",
+        ],
+        "Research Paper (Other)": [
+            "Title",
+            "Publication",
+            "Journal_Conference",
+            "Abstract",
+            "Keywords",
+            "Author",
+            "Date_of_Publication",
+            "Intro",
+            "Literature_Review",
+            "Research_Models_Used",
+            "Methodology",
+            "Discussion",
+            "Future_Scope",
+            "Theory",
+            "Independent_Variables",
+            "nof_Independent_Variables",
+            "Dependent_Variables",
+            "nof_Dependent_Variables",
+            "Control_Variables",
+            "Extraneous_Variables",
+            "nof_Control_Variables",
+            "nof_Extraneous_Variables",
+        ],
+    }
+    selected_attributes = attributes[paper_type]
+    prompt = f"""Find {num_papers} recent research papers about {topic}.
+    Return ONLY a valid JSON array with the following structure for each paper, no additional text:
+    [{{
+        {", ".join([f'"{attr}": "value"' for attr in selected_attributes])}
+    }}]"""
+    payload = {
+        "model": "llama-3.1-sonar-small-128k-chat",
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a research paper analyzer that returns only valid JSON arrays.",
+            },
+            {"role": "user", "content": prompt},
+        ],
+        "temperature": 0.1,
+    }
+    try:
+        response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
+        response.raise_for_status()
+        content = response.json()["choices"][0]["message"]["content"]
+        # Clean response and ensure it's valid JSON
+        content = content.strip()
+        if not content.startswith("["):
+            content = content[content.find("[") :]
+        if not content.endswith("]"):
+            content = content[: content.rfind("]") + 1]
+        # Validate JSON
+        papers = json.loads(content)
+        if not isinstance(papers, list):
+            raise ValueError("Response is not a JSON array")
+        # Insert into MongoDB
+        collection = db[paper_type.replace(" ", "_").lower()]
+        if papers:
+            collection.insert_many(papers)
+            return content
+        return "[]"
+    except json.JSONDecodeError as e:
+        st.error(f"Invalid JSON response: {str(e)}")
+        return None
+    except Exception as e:
+        st.error(f"Error: {str(e)}")
+        return None
+def display_research_assistant_dashboard():
+    #st.set_page_config(page_title="Research Papers", layout="wide")
+   # st.title("Research Papers")
+    # Sidebar radio
+    option = st.sidebar.radio(
+        "Select an option",
+        [
+            "Search Papers",
+            "Upload Paper",
+            "Single Keyword Search",
+            "Multiple Keywords Search",
+            "Knowledge Graph",
+            "Cosine Similarity",
+            "Paper Generator",
+            "Paper from Topic",
+            "Download Entire Corpus",
+            "Research Copilot",
+            "Research Paper Analysis Tool",
+        ],
+    )
+    if option == "Search Papers":
+        st.subheader("Search and Store Papers")
+        topic = st.text_input("Enter research topic")
+        num_papers = st.number_input(
+            "Number of papers", min_value=1, max_value=10, value=5
+        )
+        paper_type = st.selectbox(
+            "Select type of research paper",
+            [
+                "Review Based Paper",
+                "Opinion/Perspective Based Paper",
+                "Empirical Research Paper",
+                "Research Paper (Other)",
+            ],
+        )
+        if st.button("Search and Store"):
+            if topic:
+                with st.spinner(f"Searching and storing papers about {topic}..."):
+                    results = search_papers(topic, num_papers, paper_type)
+                    if results:
+                        st.success(
+                            f"Successfully stored {num_papers} papers in MongoDB"
+                        )
+                        # Display results
+                        papers = json.loads(results)
+                        for paper in papers:
+                            with st.expander(paper["Title"]):
+                                for key, value in paper.items():
+                                    if key != "Title":
+                                        st.write(f"**{key}:** {value}")
+            else:
+                st.warning("Please enter a research topic")
+        # Add MongoDB connection status
+        if st.sidebar.button("Check Database Connection"):
+            try:
+                client.admin.command("ping")
+                print(MONGODB_URI)
+                st.sidebar.success("Connected to MongoDB")
+            except Exception as e:
+                st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
+    elif option == "Single Keyword Search":
+        keywords_database_download.main()
+    elif option == "Multiple Keywords Search":
+        new_keywords.main()
+    elif option == "Knowledge Graph":
+        infranew.main()
+    elif option == "Cosine Similarity":
+        loldude.main()
+    elif option == "Paper Generator":
+        new_research_paper.main()
+    elif option == "Paper from Topic":
+        research3.main()
+    elif option == "Download Entire Corpus":
+        entire_download.main()
+    elif option == "Research Copilot":
+        sciclone.main()
+    elif option == "Research Paper Analysis Tool":
+        extract.main()
+    else:
+        research22.main()
+if __name__ == "__main__":
+    display_research_assistant_dashboard()

sciclone.py ADDED Viewed

	@@ -0,0 +1,466 @@

+import streamlit as st
+import requests
+import PyPDF2
+from typing import Optional, Dict, List
+import json
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from concurrent.futures import ThreadPoolExecutor
+import xml.etree.ElementTree as ET
+import re
+from datetime import datetime
+import time
+from dotenv import load_dotenv
+import os
+import pandas as pd
+# Load environment variables
+load_dotenv()
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
+PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
+SAPLING_API_KEY = os.getenv("SAPLING_API_KEY")
+def call_perplexity_api(prompt: str) -> str:
+    """Call Perplexity AI with a prompt, return the text response if successful."""
+    headers = {
+        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    payload = {
+        "model": "llama-3.1-sonar-small-128k-chat",
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.3,
+    }
+    try:
+        response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except Exception as e:
+        st.error(f"API Error: {str(e)}")
+        return ""
+def extract_text_from_pdf(pdf_file):
+    """Extract text content from a PDF file."""
+    pdf_reader = PyPDF2.PdfReader(pdf_file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text() + "\n"
+    return text
+def analyze_paper(text: str, category: str) -> str:
+    """Generate a prompt and get analysis for a specific category."""
+    prompts = {
+        "Summarized Abstract": "Extract and summarize the abstract from this research paper:",
+        "Results": "What are the main results and findings from this research paper:",
+        "Summarized Introduction": "Summarize the introduction section of this research paper:",
+        "Methods Used": "What are the main methods and methodologies used in this research:",
+        "Literature Survey": "Summarize the literature review or related work from this paper:",
+        "Limitations": "What are the limitations mentioned in this research:",
+        "Contributions": "What are the main contributions of this research:",
+        "Practical Implications": "What are the practical implications of this research:",
+        "Objectives": "What are the main objectives of this research:",
+        "Findings": "What are the key findings from this research:",
+        "Future Research": "What future research directions are suggested in this paper:",
+        "Dependent Variables": "What are the dependent variables studied in this research:",
+        "Independent Variables": "What are the independent variables studied in this research:",
+        "Dataset": "What dataset(s) were used in this research:",
+        "Problem Statement": "What is the main problem statement or research question:",
+        "Challenges": "What challenges were faced or addressed in this research:",
+        "Applications": "What are the potential applications of this research:",
+    }
+    prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}"  # Limit text to avoid token limits
+    return call_perplexity_api(prompt)
+class ResearchAssistant:
+    def __init__(self, perplexity_key: str):
+        self.perplexity_key = perplexity_key
+    def chat_with_pdf(self, pdf_text: str, query: str) -> Dict:
+        chunks = self._split_text(pdf_text)
+        relevant_chunks = self._get_relevant_chunks(chunks, query)
+        prompt = f"Context from PDF:\n\n{relevant_chunks}\n\nQuestion: {query}"
+        response_text = call_perplexity_api(prompt)
+        return {"choices": [{"message": {"content": response_text}}]}
+    def generate_literature_review(self, topic: str) -> Dict:
+        try:
+            # Search arXiv for papers
+            papers = self._search_arxiv(topic)
+            if not papers:
+                return {"error": "No papers found on the topic"}
+            # Format paper information
+            papers_summary = "\n\n".join(
+                [
+                    f"Paper: {p['title']}\nAuthors: {', '.join(p['authors'])}\nSummary: {p['summary']}"
+                    for p in papers
+                ]
+            )
+            prompt = f"""Generate a comprehensive literature review on '{topic}'. Based on these papers:
+            {papers_summary}
+            Structure the review as follows:
+            1. Introduction and Background
+            2. Current Research Trends
+            3. Key Findings and Themes
+            4. Research Gaps
+            5. Future Directions"""
+            response_text = call_perplexity_api(prompt)
+            return {"choices": [{"message": {"content": response_text}}]}
+        except Exception as e:
+            return {"error": f"Literature review generation failed: {str(e)}"}
+    def ai_writer(self, outline: str, references: List[str]) -> Dict:
+        prompt = f"""Write a research paper following this structure:
+        Outline:
+        {outline}
+        References to incorporate:
+        {json.dumps(references)}
+        Instructions:
+        - Follow academic writing style
+        - Include appropriate citations
+        - Maintain logical flow
+        - Include introduction and conclusion"""
+        response_text = call_perplexity_api(prompt)
+        return {"choices": [{"message": {"content": response_text}}]}
+    def refine_response(self, response: str, column: str) -> str:
+        prompt = f"""Refine the following response to fit the '{column}' column in a research paper CSV format:
+        Response: {response}
+        Ensure the response is clear, concise, and fits the context of the column."""
+        refined_response = call_perplexity_api(prompt)
+        return refined_response
+    def paraphrase(self, text: str) -> Dict:
+        prompt = f"""Paraphrase the following text while:
+        - Maintaining academic tone
+        - Preserving key meaning
+        - Improving clarity
+        Text: {text}"""
+        response_text = call_perplexity_api(prompt)
+        return {"choices": [{"message": {"content": response_text}}]}
+    def generate_citation(self, paper_info: Dict, style: str = "APA") -> Dict:
+        prompt = f"""Generate a {style} citation for:
+        Title: {paper_info['title']}
+        Authors: {', '.join(paper_info['authors'])}
+        Year: {paper_info['year']}
+        Follow exact {style} format guidelines."""
+        response_text = call_perplexity_api(prompt)
+        return {"citation": response_text}
+    def detect_ai_content(self, text: str) -> Dict:
+        prompt = f"""You are an AI content detector. Analyze the text for:
+        1. Writing style consistency
+        2. Language patterns
+        3. Contextual coherence
+        4. Common AI patterns
+        Provide a clear analysis with confidence level.
+        Text: {text}"""
+        response = requests.post(
+            "https://api.sapling.ai/api/v1/aidetect",
+            json={"key": SAPLING_API_KEY, "text": text},
+        )
+        st.info(
+            "A score from 0 to 1 will be returned, with 0 indicating the maximum confidence that the text is human-written, and 1 indicating the maximum confidence that the text is AI-generated."
+        )
+        if response.status_code == 200:
+            return {"choices": [{"message": {"content": response.json()}}]}
+        else:
+            return {
+                "error": f"Sapling API Error: {response.status_code} - {response.text}"
+            }
+    def _split_text(self, text: str) -> List[str]:
+        splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " ", ""]
+        )
+        return splitter.split_text(text)
+    def _get_relevant_chunks(self, chunks: List[str], query: str) -> str:
+        # Simple keyword-based relevance scoring
+        query_words = set(query.lower().split())
+        scored_chunks = []
+        for chunk in chunks:
+            chunk_words = set(chunk.lower().split())
+            score = len(query_words.intersection(chunk_words))
+            scored_chunks.append((score, chunk))
+        scored_chunks.sort(reverse=True)
+        return "\n\n".join(chunk for _, chunk in scored_chunks[:3])
+    def _search_arxiv(self, topic: str) -> List[Dict]:
+        try:
+            query = "+AND+".join(topic.split())
+            url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5"
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()
+            return self._parse_arxiv_response(response.text)
+        except Exception as e:
+            print(f"arXiv search failed: {str(e)}")
+            return []
+    def _parse_arxiv_response(self, response_text: str) -> List[Dict]:
+        try:
+            root = ET.fromstring(response_text)
+            papers = []
+            for entry in root.findall("{http://www.w3.org/2005/Atom}entry"):
+                paper = {
+                    "id": entry.find("{http://www.w3.org/2005/Atom}id").text,
+                    "title": entry.find(
+                        "{http://www.w3.org/2005/Atom}title"
+                    ).text.strip(),
+                    "summary": entry.find(
+                        "{http://www.w3.org/2005/Atom}summary"
+                    ).text.strip(),
+                    "authors": [
+                        author.find("{http://www.w3.org/2005/Atom}name").text.strip()
+                        for author in entry.findall(
+                            "{http://www.w3.org/2005/Atom}author"
+                        )
+                    ],
+                    "published": entry.find(
+                        "{http://www.w3.org/2005/Atom}published"
+                    ).text[:10],
+                }
+                papers.append(paper)
+            return papers
+        except Exception as e:
+            print(f"arXiv response parsing failed: {str(e)}")
+            return []
+def main():
+    # st.set_page_config(page_title="Research Assistant", layout="wide")
+    st.title("Research Copilot")
+    if not PERPLEXITY_API_KEY:
+        st.warning("Perplexity API key not found in environment variables.")
+        return
+    assistant = ResearchAssistant(PERPLEXITY_API_KEY)
+    tabs = st.tabs(
+        [
+            "Chat with PDF",
+            "Literature Review",
+            "AI Writer",
+            "Extract Data",
+            "Paraphraser",
+            "Citation Generator",
+            "AI Detector",
+        ]
+    )
+    with tabs[0]:  # Chat with PDF
+        st.header("Chat with PDF")
+        # File uploader with clear button
+        col1, col2 = st.columns([3, 1])
+        with col1:
+            uploaded_file = st.file_uploader("Upload PDF", type="pdf", key="pdf_chat")
+        with col2:
+            if st.button("Clear PDF"):
+                st.session_state.pop("pdf_text", None)
+                st.rerun()
+        if uploaded_file:
+            if "pdf_text" not in st.session_state:
+                with st.spinner("Processing PDF..."):
+                    reader = PyPDF2.PdfReader(uploaded_file)
+                    st.session_state.pdf_text = ""
+                    for page in reader.pages:
+                        st.session_state.pdf_text += page.extract_text()
+                    st.success("PDF processed successfully!")
+            query = st.text_input("Ask a question about the PDF")
+            if query:
+                with st.spinner("Analyzing..."):
+                    response = assistant.chat_with_pdf(st.session_state.pdf_text, query)
+                    if "error" in response:
+                        st.error(response["error"])
+                    else:
+                        st.write(response["choices"][0]["message"]["content"])
+    with tabs[1]:  # Literature Review
+        st.header("Literature Review")
+        topic = st.text_input("Enter research topic")
+        if st.button("Generate Review") and topic:
+            with st.spinner("Generating literature review..."):
+                review = assistant.generate_literature_review(topic)
+                if "error" in review:
+                    st.error(review["error"])
+                else:
+                    st.write(review["choices"][0]["message"]["content"])
+    with tabs[2]:  # AI Writer
+        st.header("AI Writer")
+        outline = st.text_area("Enter paper outline")
+        references = st.text_area("Enter references (one per line)")
+        if st.button("Generate Paper") and outline:
+            with st.spinner("Writing paper..."):
+                paper = assistant.ai_writer(outline, references.split("\n"))
+                if "error" in paper:
+                    st.error(paper["error"])
+                else:
+                    st.write(paper["choices"][0]["message"]["content"])
+    with tabs[3]:  # Extract Data
+        st.header("Extract Data")
+        uploaded_files = st.file_uploader(
+            "Upload multiple PDF  files", type="pdf", accept_multiple_files=True
+        )
+        if uploaded_files:
+            if st.button("Process Papers"):
+                # Initialize progress bar
+                progress_bar = st.progress(0)
+                status_text = st.empty()
+                # Initialize results dictionary
+                results = []
+                # Define categories
+                categories = [
+                    "Summarized Abstract",
+                    "Results",
+                    "Summarized Introduction",
+                    "Methods Used",
+                    "Literature Survey",
+                    "Limitations",
+                    "Contributions",
+                    "Practical Implications",
+                    "Objectives",
+                    "Findings",
+                    "Future Research",
+                    "Dependent Variables",
+                    "Independent Variables",
+                    "Dataset",
+                    "Problem Statement",
+                    "Challenges",
+                    "Applications",
+                ]
+                # Process each file
+                for i, file in enumerate(uploaded_files):
+                    status_text.text(f"Processing {file.name}...")
+                    # Extract text from PDF
+                    text = extract_text_from_pdf(file)
+                    # Initialize paper results
+                    paper_results = {"Filename": file.name}
+                    # Analyze each category
+                    for j, category in enumerate(categories):
+                        status_text.text(f"Processing {file.name} - {category}")
+                        paper_results[category] = analyze_paper(text, category)
+                        # Update progress
+                        progress = (i * len(categories) + j + 1) / (
+                            len(uploaded_files) * len(categories)
+                        )
+                        progress_bar.progress(progress)
+                        # Add small delay to avoid API rate limits
+                        time.sleep(1)
+                    results.append(paper_results)
+                # Create DataFrame
+                df = pd.DataFrame(results)
+                # Convert DataFrame to CSV
+                csv = df.to_csv(index=False)
+                # Create download button
+                st.download_button(
+                    label="Download Results as CSV",
+                    data=csv,
+                    file_name="research_papers_analysis.csv",
+                    mime="text/csv",
+                )
+                # Display results in the app
+                st.subheader("Analysis Results")
+                st.dataframe(df)
+                status_text.text("Processing complete!")
+                progress_bar.progress(1.0)
+    with tabs[4]:  # Paraphraser
+        st.header("Paraphraser")
+        text = st.text_area("Enter text to paraphrase")
+        if st.button("Paraphrase") and text:
+            with st.spinner("Paraphrasing..."):
+                result = assistant.paraphrase(text)
+                if "error" in result:
+                    st.error(result["error"])
+                else:
+                    st.write(result["choices"][0]["message"]["content"])
+    with tabs[5]:  # Citation Generator
+        st.header("Citation Generator")
+        col1, col2 = st.columns(2)
+        with col1:
+            title = st.text_input("Paper Title")
+            authors = st.text_input("Authors (comma-separated)")
+        with col2:
+            year = st.text_input("Year")
+            style = st.selectbox("Citation Style", ["APA", "MLA", "Chicago"])
+        if st.button("Generate Citation") and title:
+            with st.spinner("Generating citation..."):
+                citation = assistant.generate_citation(
+                    {
+                        "title": title,
+                        "authors": [a.strip() for a in authors.split(",")],
+                        "year": year,
+                    },
+                    style,
+                )
+                if "error" in citation:
+                    st.error(citation["error"])
+                else:
+                    st.code(citation["citation"], language="text")
+    with tabs[6]:  # AI Detector
+        st.header("AI Detector")
+        text = st.text_area("Enter text to analyze")
+        if st.button("Detect AI Content") and text:
+            with st.spinner("Analyzing..."):
+                result = assistant.detect_ai_content(text)
+                if "error" in result:
+                    st.error(result["error"])
+                else:
+                    st.write(result["choices"][0]["message"]["content"])
+if __name__ == "__main__":
+    main()

session_page.py ADDED Viewed

The diff for this file is too large to render. See raw diff

ui.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import streamlit as st
+from streamlit_option_menu import option_menu
+# Page Configuration
+st.set_page_config(page_title="Enhanced Navigation Demo", layout="wide")
+# Top Navigation Bar using option_menu
+selected = option_menu(
+    menu_title=None,
+    options=["Home", "Documentation", "Examples", "Community", "About"],
+    icons=["house", "book", "code", "people", "info-circle"],
+    menu_icon="cast",
+    default_index=0,
+    orientation="horizontal",
+    styles={
+        "container": {"padding": "0!important", "background-color": "#fafafa"},
+        "icon": {"color": "orange", "font-size": "25px"},
+        "nav-link": {
+            "font-size": "15px",
+            "text-align": "center",
+            "margin":"0px",
+            "--hover-color": "#eee",
+        },
+        "nav-link-selected": {"background-color": "#0083B8"},
+    }
+)
+# Sidebar Navigation
+with st.sidebar:
+    st.header("Navigation Menu")
+    # Main Menu Items
+    selected_side = option_menu(
+        menu_title="Go to",
+        options=["Dashboard", "Analytics", "Reports", "Settings"],
+        icons=["speedometer2", "graph-up", "file-text", "gear"],
+        menu_icon="list",
+        default_index=0,
+    )
+    # Expandable Reports Section
+    if selected_side == "Reports":
+        with st.expander("Reports", expanded=True):
+            st.button("Weekly Report")
+            st.button("Monthly Report")
+            st.button("Annual Report")
+# Main Content Area based on top navigation
+if selected == "Home":
+    st.title("Welcome to Home")
+    st.write("This is the home page content.")
+    # Dashboard Content
+    st.header("Dashboard")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric("Sales", "$12,345", "+2.5%")
+    with col2:
+        st.metric("Users", "1,234", "-8%")
+    with col3:
+        st.metric("Conversion", "3.2%", "+1.2%")
+elif selected == "Documentation":
+    st.title("Documentation")
+    st.write("Documentation content goes here.")
+elif selected == "Examples":
+    st.title("Examples")
+    st.write("Example content goes here.")
+elif selected == "Community":
+    st.title("Community")
+    st.write("Community content goes here.")
+elif selected == "About":
+    st.title("About")
+    st.write("About content goes here.")
+# Content based on sidebar selection
+if selected_side == "Analytics":
+    st.header("Analytics")
+    st.line_chart({"data": [1, 5, 2, 6, 2, 1]})
+elif selected_side == "Settings":
+    st.header("Settings")
+    st.toggle("Dark Mode")
+    st.toggle("Notifications")
+    st.slider("Volume", 0, 100, 50)
+# Footer
+st.markdown(
+    """
+    <style>
+    .footer {
+        position: fixed;
+        left: 0;
+        bottom: 0;
+        width: 100%;
+        background-color: #0E1117;
+        color: white;
+        text-align: center;
+        padding: 10px;
+        font-size: 14px;
+    }
+    </style>
+    <div class='footer'>
+        © 2024 Your App Name • Privacy Policy • Terms of Service
+    </div>
+    """,
+    unsafe_allow_html=True
+)

utils/helpers.py ADDED Viewed

	@@ -0,0 +1,83 @@

+from datetime import datetime, timedelta
+import streamlit as st
+def format_datetime(dt):
+    """Format datetime for display"""
+    return dt.strftime("%Y-%m-%d %H:%M")
+def get_session_progress(username, course_id, session_id):
+    """
+    Get user's progress for a specific session
+    Returns dict with pre_class, in_class, and post_class completion status
+    """
+    # Demo implementation - replace with actual database queries
+    return {
+        'pre_class': {
+            'completed': True,
+            'last_access': datetime.now() - timedelta(days=1),
+            'resources_viewed': 3,
+            'total_resources': 3
+        },
+        'in_class': {
+            'completed': False,
+            'attendance': True,
+            'quiz_completed': False,
+            'questions_asked': 5
+        },
+        'post_class': {
+            'completed': False,
+            'assignments_submitted': 1,
+            'total_assignments': 2,
+            'grade': None
+        }
+    }
+def get_course_sessions(course_id):
+    """Get all sessions for a course"""
+    # Demo implementation - replace with database query
+    return [
+        {
+            'id': 1,
+            'title': 'Introduction to Programming Concepts',
+            'date': datetime.now() + timedelta(days=i),
+            'status': 'completed' if i < 0 else 'upcoming'
+        }
+        for i in range(-2, 5)
+    ]
+def display_progress_bar(completed, total, text=""):
+    """Display a progress bar with text"""
+    progress = completed / total if total > 0 else 0
+    st.progress(progress)
+    st.text(f"{text}: {completed}/{total} ({progress*100:.1f}%)")
+def create_notification(message, type="info"):
+    """Create a notification message"""
+    if type == "success":
+        st.success(message)
+    elif type == "error":
+        st.error(message)
+    elif type == "warning":
+        st.warning(message)
+    else:
+        st.info(message)
+class SessionManager:
+    """Manage session state and navigation"""
+    @staticmethod
+    def get_current_session():
+        """Get current session information"""
+        if 'current_session' not in st.session_state:
+            st.session_state.current_session = 1
+        return st.session_state.current_session
+    @staticmethod
+    def set_current_session(session_id):
+        """Set current session"""
+        st.session_state.current_session = session_id
+    @staticmethod
+    def clear_session():
+        """Clear session state"""
+        for key in list(st.session_state.keys()):
+            del st.session_state[key]

utils/sample_data.py ADDED Viewed

	@@ -0,0 +1,226 @@

+from datetime import datetime, timedelta
+SAMPLE_COURSES = [
+    {
+        'course_id': 'CS101',
+        'title': 'Introduction to Computer Science',
+        'description': 'This course covers the basics of computer science and programming.',
+        'instructor': 'Dr. John Doe',
+        'duration': '10 weeks'
+    },
+    {
+        'course_id': 'CS102',
+        'title': 'Data Structures and Algorithms',
+        'description': 'This course introduces data structures and algorithms for efficient data processing.',
+        'instructor': 'Dr. Jane Smith',
+        'duration': '12 weeks'
+    },
+    {
+        'course_id': 'CS103',
+        'title': 'Advanced Python Programming',
+        'description': 'This course covers advanced topics in Python programming, including file handling and exception management.',
+        'instructor': 'Dr. Emily Johnson',
+        'duration': '8 weeks'
+    }
+]
+SAMPLE_SESSIONS = [
+    {
+        'id': 1,
+        'course_id': 'CS101',
+        'title': 'Introduction to Programming Fundamentals',
+        'date': datetime.now() - timedelta(days=7),
+        'status': 'completed',
+        'pre_class': {
+            'resources': [
+                {'type': 'pdf', 'title': 'Introduction to Python Basics', 'url': '/assets/python_basics.pdf'},
+                {'type': 'video', 'title': 'Programming Fundamentals', 'duration': '15:00'},
+                {'type': 'reading', 'title': 'Chapter 1: Getting Started', 'pages': '1-15'}
+            ],
+            'completion_required': True
+        },
+        'in_class': {
+            'topics': ['Variables', 'Data Types', 'Basic Operations'],
+            'quiz': {
+                'title': 'Python Basics Quiz',
+                'questions': 5,
+                'duration': 15
+            },
+            'polls': [
+                {'question': 'How comfortable are you with Python syntax?', 'options': ['Very', 'Somewhat', 'Not at all']}
+            ]
+        },
+        'post_class': {
+            'assignments': [
+                {
+                    'id': 1,
+                    'title': 'Basic Python Programs',
+                    'due_date': datetime.now() + timedelta(days=2),
+                    'status': 'pending'
+                }
+            ]
+        }
+    },
+    {
+        'id': 2,
+        'course_id': 'CS101',
+        'title': 'Control Flow and Functions',
+        'date': datetime.now() - timedelta(days=3),
+        'status': 'completed',
+        'pre_class': {
+            'resources': [
+                {'type': 'pdf', 'title': 'Control Flow in Python', 'url': '/assets/control_flow.pdf'},
+                {'type': 'video', 'title': 'Functions and Methods', 'duration': '20:00'}
+            ],
+            'completion_required': True
+        },
+        'in_class': {
+            'topics': ['If-else statements', 'Loops', 'Function definitions'],
+            'quiz': {
+                'title': 'Control Flow Quiz',
+                'questions': 8,
+                'duration': 20
+            },
+            'polls': [
+                {'question': 'Which loop type do you find more intuitive?', 'options': ['For loops', 'While loops', 'Both']}
+            ]
+        },
+        'post_class': {
+            'assignments': [
+                {
+                    'id': 2,
+                    'title': 'Function Implementation Exercise',
+                    'due_date': datetime.now() + timedelta(days=4),
+                    'status': 'pending'
+                }
+            ]
+        }
+    },
+    {
+        'id': 3,
+        'course_id': 'CS102',
+        'title': 'Data Structures',
+        'date': datetime.now(),
+        'status': 'in_progress',
+        'pre_class': {
+            'resources': [
+                {'type': 'pdf', 'title': 'Python Data Structures', 'url': '/assets/data_structures.pdf'},
+                {'type': 'video', 'title': 'Lists and Dictionaries', 'duration': '25:00'}
+            ],
+            'completion_required': True
+        },
+        'in_class': {
+            'topics': ['Lists', 'Tuples', 'Dictionaries', 'Sets'],
+            'quiz': {
+                'title': 'Data Structures Quiz',
+                'questions': 10,
+                'duration': 25
+            },
+            'polls': [
+                {'question': 'Which data structure do you use most often?', 'options': ['Lists', 'Dictionaries', 'Sets', 'Tuples']}
+            ]
+        },
+        'post_class': {
+            'assignments': [
+                {
+                    'id': 3,
+                    'title': 'Data Structure Implementation',
+                    'due_date': datetime.now() + timedelta(days=7),
+                    'status': 'not_started'
+                }
+            ]
+        }
+    },
+    {
+        'id': 4,
+        'course_id': 'CS101',
+        'title': 'Object-Oriented Programming',
+        'date': datetime.now() + timedelta(days=4),
+        'status': 'upcoming',
+        'pre_class': {
+            'resources': [
+                {'type': 'pdf', 'title': 'OOP Concepts', 'url': '/assets/oop_concepts.pdf'},
+                {'type': 'video', 'title': 'Classes and Objects', 'duration': '30:00'}
+            ],
+            'completion_required': True
+        },
+        'in_class': {
+            'topics': ['Classes', 'Objects', 'Inheritance', 'Polymorphism'],
+            'quiz': {
+                'title': 'OOP Concepts Quiz',
+                'questions': 12,
+                'duration': 30
+            },
+            'polls': [
+                {'question': 'Have you used OOP before?', 'options': ['Yes', 'No', 'Not sure'], 'responses': {'For loops': 12, 'While loops': 8, 'Both': 10}}
+            ]
+        },
+        'post_class': {
+            'assignments': [
+                {
+                    'id': 4,
+                    'title': 'Class Implementation Project',
+                    'due_date': datetime.now() + timedelta(days=11),
+                    'status': 'not_started'
+                }
+            ]
+        }
+    },
+    {
+        'id': 5,
+        'course_id': 'CS103',
+        'title': 'File Handling and Exception Management',
+        'date': datetime.now() + timedelta(days=7),
+        'status': 'upcoming',
+        'pre_class': {
+            'resources': [
+                {'type': 'pdf', 'title': 'File Operations in Python', 'url': '/assets/file_ops.pdf'},
+                {'type': 'video', 'title': 'Exception Handling', 'duration': '20:00'}
+            ],
+            'completion_required': True
+        },
+        'in_class': {
+            'topics': ['File Operations', 'Exception Handling', 'Context Managers'],
+            'quiz': {
+                'title': 'File Operations Quiz',
+                'questions': 8,
+                'duration': 20
+            },
+            'polls': [
+                {'question': 'How often do you handle exceptions in your code?',
+                 'options': ['Always', 'Sometimes', 'Rarely', 'Never'],
+                 'responses': {'Very': 10, 'Somewhat': 15, 'Not at all': 5}
+                }
+            ]
+        },
+        'post_class': {
+            'assignments': [
+                {
+                    'id': 5,
+                    'title': 'File Processing Application',
+                    'due_date': datetime.now() + timedelta(days=14),
+                    'status': 'not_started'
+                }
+            ]
+        }
+    }
+]
+# Chatbot message history
+SAMPLE_CHAT_HISTORY = {
+    1: [
+        {'user': 'student1', 'message': 'What is the difference between list and tuple?', 'timestamp': datetime.now()},
+        {'user': 'chatbot', 'message': 'Lists are mutable (can be modified) while tuples are immutable (cannot be modified after creation).', 'timestamp': datetime.now()}
+    ]
+}
+# Student progress data
+SAMPLE_STUDENT_PROGRESS = {
+    'user1': {
+        1: {'pre_class': 50, 'in_class': 80, 'post_class': 90},
+        2: {'pre_class': 100, 'in_class': 75, 'post_class': 85},
+        3: {'pre_class': 50, 'in_class': 0, 'post_class': 0},
+        4: {'pre_class': 0, 'in_class': 0, 'post_class': 0},
+        5: {'pre_class': 0, 'in_class': 0, 'post_class': 0}
+    }
+}