YashJD commited on
Commit
e107ee4
·
1 Parent(s): f885955

Initial Commit

Browse files
.gitignore ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore .env file
2
+ .env
3
+ __pycache__/
4
+ newenv
5
+ backupgoal.py
6
+ backupgoal2.py
7
+ backupresearch.py
8
+ goals.py
9
+ goals3.py
10
+ research_assistant_dashboard2.py
11
+ tempCodeRunnerFile.py
12
+ all_chat_histories.json
13
+ all_chat_histories2.json
14
+ analytics.ipynb
15
+ chat_history.csv
16
+ harshal.py
17
+ course_creation.py
18
+ topics.json
19
+ new_analytics.json
20
+ new_analytics2.json
21
+ pre_class_analytics.py
22
+ sample_files/
README.md CHANGED
@@ -1,13 +1,11 @@
1
  ---
2
- title: FlipClass RCopilot
3
- emoji: 📉
4
- colorFrom: red
5
- colorTo: indigo
6
  sdk: streamlit
7
  sdk_version: 1.41.1
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: NovaScholar
3
+ emoji: 🐢
4
+ colorFrom: purple
5
+ colorTo: red
6
  sdk: streamlit
7
  sdk_version: 1.41.1
8
  app_file: app.py
9
  pinned: false
10
+ short_description: Generative-AI powered Flipped Classroom Learning Platform
11
  ---
 
 
Research Paper Attributes.txt ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Review Based Paper
2
+ Title TEXT,
3
+ Publication TEXT,
4
+ Journal_Conference TEXT,
5
+ Abstract TEXT,
6
+ Keywords TEXT,
7
+ Author TEXT
8
+ Date_of_Publication TEXT,
9
+ Intro TEXT,
10
+ Literature_Review TEXT,
11
+ Body: TEXT
12
+ Protocol: TEXT
13
+ Search String: TEXT
14
+ Included Studies: TEXT
15
+ Data Collection and Analysis Methods: TEXT
16
+ Data Extraction Table: TEXT
17
+ Synthesis and Analysis: TEXT
18
+ Conclusion
19
+ Limitations
20
+ Results
21
+ References
22
+
23
+ Risk of Bias Assessment:Opinion/Perspective Based Paper
24
+ Title TEXT,
25
+ Publication TEXT,
26
+ Journal_Conference TEXT,
27
+ Abstract TEXT,
28
+ Keywords TEXT,
29
+ Author TEXT,
30
+ Date_of_Publication TEXT,
31
+ Intro TEXT,
32
+ Literature_Review TEXT
33
+ Introduction: TEXT
34
+ Body: TEXT
35
+ Results and Discussion:TEXT
36
+ Conclusion: TEXT
37
+ References: TEXT
38
+
39
+
40
+
41
+
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
53
+
54
+ Empirical Research Paper
55
+ Title TEXT,
56
+ Publication TEXT,
57
+ Journal_Conference TEXT,
58
+ Abstract TEXT,
59
+ Keywords TEXT,
60
+ Author TEXT,
61
+ Date_of_Publication TEXT,
62
+ Intro TEXT,
63
+ Literature_Review TEXT
64
+ Introduction: TEXT
65
+ Body: TEXT
66
+ Methodology: TEXT
67
+ Participants: TEXT - Describes the sample and the sampling methods used.
68
+ Survey Instrument: TEXT - Describes the design and development of the survey questionnaire.
69
+ Data Collection: TEXT - Explains how the survey data was collected.
70
+ Data Analysis: TEXT - Details the statistical techniques used to analyze the data.
71
+
72
+
73
+ Results and Discussion:TEXT
74
+ Conclusion: TEXT
75
+ References: TEXT
76
+ Research Paper (Other)
77
+ Title TEXT,
78
+ Publication TEXT,
79
+ Journal_Conference TEXT,
80
+ Abstract TEXT,
81
+ Keywords TEXT,
82
+ Author TEXT,
83
+ Date_of_Publication TEXT,
84
+ Intro TEXT,
85
+ Literature_Review TEXT,
86
+ Research_Models_Used TEXT,
87
+ Methodology TEXT,
88
+ Discussion TEXT,
89
+ Future_Scope TEXT,
90
+ Theory TEXT,
91
+ Independent_Variables TEXT,
92
+ nof_Independent_Variables INTEGER,
93
+ Dependent_Variables TEXT,
94
+ nof_Dependent_Variables INTEGER,
95
+ Control_Variables TEXT,
96
+ Extraneous_Variables TEXT,
97
+ nof_Control_Variables INTEGER,
98
+ nof_Extraneous_Variables INTEGER
analytics.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import numpy as np
4
+ from numpy.linalg import norm
5
+ from pymongo import MongoClient
6
+ import openai
7
+ from openai import OpenAI
8
+ import streamlit as st
9
+ from datetime import datetime
10
+
11
+ # MongoDB connection
12
+ MONGO_URI = os.getenv('MONGO_URI')
13
+
14
+ client = MongoClient(MONGO_URI)
15
+ db = client['digital_nova']
16
+ themes_collection = db['themes']
17
+ corpus_collection = db['corpus']
18
+ vectors_collection = db['vectors'] # Reference to 'vectors' collection
19
+ users_collection = db['users']
20
+
21
+ # Function to create embeddings
22
+ def create_embeddings(text, openai_api_key):
23
+ client = OpenAI(api_key=openai_api_key)
24
+ response = client.embeddings.create(
25
+ input=text,
26
+ model="text-embedding-3-small"
27
+ )
28
+ return response.data[0].embedding
29
+
30
+ # Function to calculate cosine similarity
31
+ def cosine_similarity(v1, v2):
32
+ v1 = np.array(v1)
33
+ v2 = np.array(v2)
34
+ dot_product = np.dot(v1, v2)
35
+ norm_product = norm(v1) * norm(v2)
36
+ return dot_product / norm_product if norm_product != 0 else 0
37
+
38
+ def derive_analytics(goal, reference_text, openai_api_key, context=None, synoptic=None):
39
+ """
40
+ Analyze subjective answers with respect to pre-class materials and synoptic, and provide detailed feedback
41
+
42
+ Args:
43
+ goal (str): Analysis objective
44
+ reference_text (str): Student's answer text
45
+ openai_api_key (str): OpenAI API key
46
+ context (str, optional): Pre-class material content for comparison
47
+ synoptic (str, optional): Synoptic content for evaluation
48
+ """
49
+ template = f"""Given a student's answer to a subjective question, analyze it following these specific guidelines. Compare it with the provided pre-class materials and synoptic (if available) to assess correctness and completeness.
50
+
51
+ 1. Analyze the text as an experienced educational assessor, considering:
52
+ - Conceptual understanding
53
+ - Factual accuracy
54
+ - Completeness of response
55
+ - Use of relevant terminology
56
+ - Application of concepts
57
+
58
+ 2. Structure the output in markdown with two sections:
59
+
60
+ **Correctness Assessment**
61
+ - Rate overall correctness on a scale of 1-10
62
+
63
+ **Evidence-Based Feedback**
64
+ - Provide specific evidence from the student's answer to justify the score reduction
65
+ - Highlight the exact lines or phrases that need improvement
66
+
67
+ Pre-class Materials Context:
68
+ {context if context else "No reference materials provided"}
69
+
70
+ Synoptic:
71
+ {synoptic if synoptic else "No synoptic provided"}
72
+
73
+ Student's Answer:
74
+ {reference_text}
75
+
76
+ Rules:
77
+ - Base assessment strictly on provided content
78
+ - Be specific in feedback and suggestions
79
+ """
80
+
81
+ # Initialize OpenAI client
82
+ client = OpenAI(api_key=openai_api_key)
83
+
84
+ try:
85
+ response = client.chat.completions.create(
86
+ model="gpt-4-0125-preview",
87
+ messages=[
88
+ {"role": "system", "content": "You are an educational assessment expert."},
89
+ {"role": "user", "content": template}
90
+ ],
91
+ temperature=0.7
92
+ )
93
+ analysis = response.choices[0].message.content
94
+ return analysis
95
+ except Exception as e:
96
+ print(f"Error in generating analysis with OpenAI: {str(e)}")
97
+ return "Error generating analysis"
app.py ADDED
@@ -0,0 +1,1424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import streamlit as st
3
+ from datetime import datetime, date, time, timedelta
4
+ from pathlib import Path
5
+ from utils.sample_data import SAMPLE_COURSES, SAMPLE_SESSIONS
6
+ from session_page import display_session_content
7
+ from db import (
8
+ courses_collection2,
9
+ faculty_collection,
10
+ students_collection,
11
+ research_assistants_collection,
12
+ analysts_collection,
13
+ )
14
+ from werkzeug.security import generate_password_hash, check_password_hash
15
+ import os
16
+ from openai import OpenAI
17
+ from dotenv import load_dotenv
18
+ from create_course2 import create_course, courses_collection, generate_perplexity_response, generate_session_resources, PERPLEXITY_API_KEY, validate_course_plan
19
+ import json
20
+ from bson import ObjectId
21
+ client = OpenAI(api_key=os.getenv("OPENAI_KEY"))
22
+ from dotenv import load_dotenv
23
+
24
+ load_dotenv()
25
+ # PERPLEXITY_API_KEY = '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'
26
+
27
+ def get_research_papers(query):
28
+ """Get research paper recommendations based on query"""
29
+ try:
30
+ response = client.chat.completions.create(
31
+ model="gpt-3.5-turbo",
32
+ messages=[
33
+ {
34
+ "role": "system",
35
+ "content": "You are a helpful research assistant. Provide 10 relevant research papers with titles, authors, brief descriptions, and DOI/URL links. Format each paper as: \n\n1. **Title**\nAuthors: [names]\nLink: [DOI/URL]\nDescription: [brief summary]",
36
+ },
37
+ {
38
+ "role": "user",
39
+ "content": f"Give me 10 research papers about: {query}. Include valid DOI links or URLs to the papers where available.",
40
+ },
41
+ ],
42
+ )
43
+ return response.choices[0].message.content
44
+ except Exception as e:
45
+ return f"Error getting recommendations: {str(e)}"
46
+
47
+
48
+ def analyze_research_gaps(papers):
49
+ """Analyze gaps in research based on recommended papers"""
50
+ try:
51
+ response = client.chat.completions.create(
52
+ model="gpt-3.5-turbo",
53
+ messages=[
54
+ {
55
+ "role": "system",
56
+ "content": "You are a research analysis expert. Based on the provided papers, identify potential research gaps and future research directions.",
57
+ },
58
+ {
59
+ "role": "user",
60
+ "content": f"Based on these papers, what are the key areas that need more research?\n\nPapers:\n{papers}",
61
+ },
62
+ ],
63
+ )
64
+ return response.choices[0].message.content
65
+ except Exception as e:
66
+ return f"Error analyzing research gaps: {str(e)}"
67
+
68
+
69
+ def init_session_state():
70
+ """Initialize session state variables"""
71
+ if "authenticated" not in st.session_state:
72
+ st.session_state.authenticated = False
73
+ if "user_id" not in st.session_state:
74
+ st.session_state.user_id = None
75
+ if "user_type" not in st.session_state:
76
+ st.session_state.user_type = None
77
+ if "username" not in st.session_state:
78
+ st.session_state.username = None
79
+ if "selected_course" not in st.session_state:
80
+ st.session_state.selected_course = None
81
+ if "show_create_course_form" not in st.session_state:
82
+ st.session_state.show_create_course_form = False
83
+ if "show_create_session_form" not in st.session_state:
84
+ st.session_state.show_create_session_form = False
85
+ if "show_enroll_course_page" not in st.session_state:
86
+ st.session_state.show_enroll_course_page = False
87
+ if "course_to_enroll" not in st.session_state:
88
+ st.session_state.course_to_enroll = None
89
+
90
+ def login_user(username, password, user_type):
91
+ """Login user based on credentials"""
92
+ if user_type == "student":
93
+ # user = students_collection.find_one({"full_name": username}) or students_collection.find_one({"username": username})
94
+ user = students_collection.find_one({"$or": [{"full_name": username}, {"username": username}]})
95
+ elif user_type == "faculty":
96
+ user = faculty_collection.find_one({"full_name": username})
97
+ elif user_type == "research_assistant":
98
+ user = research_assistants_collection.find_one({"full_name": username})
99
+ elif user_type == "analyst":
100
+ user = analysts_collection.find_one({"full_name": username})
101
+
102
+ if user and check_password_hash(user["password"], password):
103
+ st.session_state.user_id = user["_id"]
104
+ print(st.session_state.user_id)
105
+ st.session_state.authenticated = True
106
+ st.session_state.user_type = user_type
107
+ st.session_state.username = username
108
+ return True
109
+ return False
110
+
111
+ # def login_form():
112
+ # """Display login form"""
113
+ # st.title("Welcome to NOVAScholar")
114
+
115
+ # with st.form("login_form"):
116
+
117
+ # user_type = st.selectbox(
118
+ # "Please select your Role", ["student", "faculty", "research_assistant", "analyst"]
119
+ # )
120
+ # username = st.text_input("Username")
121
+ # password = st.text_input("Password", type="password")
122
+ # submit = st.form_submit_button("Login")
123
+
124
+ # if submit:
125
+ # if login_user(username, password, user_type):
126
+ # st.success("Login successful!")
127
+ # st.rerun()
128
+ # else:
129
+ # st.error("Invalid credentials!")
130
+ def login_form():
131
+ """Display enhanced login form"""
132
+ st.title("Welcome to NOVAScholar")
133
+
134
+ with st.form("login_form"):
135
+ # Role selection at the top
136
+ user_type = st.selectbox(
137
+ "Please select your Role",
138
+ ["student", "faculty", "research_assistant", "analyst"]
139
+ )
140
+
141
+ # Username/email and password stacked vertically
142
+ username = st.text_input("Username or Email")
143
+ password = st.text_input("Password", type="password")
144
+
145
+ # Login button
146
+ submit = st.form_submit_button("Login")
147
+
148
+ if submit:
149
+ # Handle both username and email login
150
+ if '@' in username:
151
+ username = extract_username(username)
152
+
153
+ if login_user(username, password, user_type):
154
+ st.success("Login successful!")
155
+ st.rerun()
156
+ else:
157
+ st.error("Invalid credentials!")
158
+
159
+ def get_courses(username, user_type):
160
+ if user_type == "student":
161
+ student = students_collection.find_one({"$or": [{"full_name": username}, {"username": username}]})
162
+ if student:
163
+ enrolled_course_ids = [
164
+ course["course_id"] for course in student.get("enrolled_courses", [])
165
+ ]
166
+ courses = courses_collection.find(
167
+ {"course_id": {"$in": enrolled_course_ids}}
168
+ )
169
+ # courses += courses_collection2.find(
170
+ # {"course_id": {"$in": enrolled_course_ids}}
171
+ # )
172
+ # # course_titles = [course['title'] for course in courses]
173
+ # return list(courses)
174
+ # courses_cursor1 = courses_collection.find(
175
+ # {"course_id": {"$in": enrolled_course_ids}}
176
+ # )
177
+ # courses_cursor2 = courses_collection2.find(
178
+ # {"course_id": {"$in": enrolled_course_ids}}
179
+ # )
180
+ # courses = list(courses_cursor1) + list(courses_cursor2)
181
+ return list(courses)
182
+ elif user_type == "faculty":
183
+ faculty = faculty_collection.find_one({"full_name": username})
184
+ if faculty:
185
+ course_ids = [
186
+ course["course_id"] for course in faculty.get("courses_taught", [])
187
+ ]
188
+ # courses_1 = list(courses_collection2.find({"course_id": {"$in": course_ids}}))
189
+ courses_2 = list(courses_collection.find({"course_id": {"$in": course_ids}}))
190
+ return courses_2
191
+ elif user_type == "research_assistant":
192
+ research_assistant = research_assistants_collection.find_one(
193
+ {"full_name": username}
194
+ )
195
+ if research_assistant:
196
+ course_ids = [
197
+ course["course_id"]
198
+ for course in research_assistant.get("courses_assisted", [])
199
+ ]
200
+ courses = courses_collection2.find({"course_id": {"$in": course_ids}})
201
+ return list(courses)
202
+ else:
203
+ return []
204
+
205
+
206
+ def get_course_ids():
207
+ """Get course IDs for sample courses"""
208
+ return [course["course_id"] for course in SAMPLE_COURSES]
209
+
210
+
211
+ def get_sessions(course_id, course_title):
212
+ """Get sessions for a given course ID"""
213
+ course = courses_collection.find_one({"course_id": course_id, "title": course_title})
214
+ if course:
215
+ return course.get("sessions", [])
216
+ return []
217
+
218
+
219
+ def create_session(new_session, course_id):
220
+ """Create a new session for a given course ID"""
221
+ course = courses_collection2.find_one({"course_id": course_id}) | courses_collection.find_one({"course_id": course_id})
222
+ if course:
223
+ last_session_id = max((session["session_id"] for session in course["sessions"]))
224
+ last_session_id = int(last_session_id[1:])
225
+ new_session_id = last_session_id + 1
226
+ new_session["session_id"] = "S" + str(new_session_id)
227
+ courses_collection2.update_one(
228
+ {"course_id": new_session["course_id"]},
229
+ {"$push": {"sessions": new_session}},
230
+ )
231
+ return True
232
+ return False
233
+
234
+
235
+ def create_session_form(course_id):
236
+ """Display form to create a new session and perform the creation operation"""
237
+ st.title("Create New Session")
238
+
239
+ if 'session_time' not in st.session_state:
240
+ st.session_state.session_time = datetime.now().time()
241
+ if 'show_create_session_form' not in st.session_state:
242
+ st.session_state.show_create_session_form = False
243
+
244
+ with st.form("create_session_form"):
245
+ session_title = st.text_input("Session Title")
246
+ session_date = st.date_input("Session Date", date.today(), key="session_date")
247
+ session_time = st.time_input(
248
+ "Session Time", st.session_state.session_time, key="session_time"
249
+ )
250
+
251
+ new_session_id = None
252
+ # Generate new session ID
253
+ course = courses_collection2.find_one({"course_id": course_id})
254
+ if course and "sessions" in course and course["sessions"]:
255
+ last_session_id = max(
256
+ int(session["session_id"][1:]) for session in course["sessions"]
257
+ )
258
+ new_session_id = last_session_id + 1
259
+ else:
260
+ new_session_id = 1
261
+
262
+ if st.form_submit_button("Create Session"):
263
+ clicked = True
264
+ new_session = {
265
+ "session_id": f"S{new_session_id}",
266
+ "course_id": course_id,
267
+ "title": session_title,
268
+ "date": datetime.combine(session_date, session_time),
269
+ "status": "upcoming",
270
+ "created_at": datetime.utcnow(),
271
+ "pre_class": {
272
+ "resources": [],
273
+ "completetion_required": True,
274
+ },
275
+ "in_class": {
276
+ "topics": [],
277
+ "quiz": {"title": "", "questions": 0, "duration": 0},
278
+ "polls": [],
279
+ },
280
+ "post_class": {
281
+ "assignments": [],
282
+ },
283
+ }
284
+ courses_collection2.update_one(
285
+ {"course_id": course_id}, {"$push": {"sessions": new_session}}
286
+ )
287
+ st.success("Session created successfully!")
288
+ st.session_state.show_create_session_form = False
289
+
290
+ # new_session_id = None
291
+ # creation_success = False
292
+ # # Generate new session ID
293
+ # course = courses_collection2.find_one({"course_id": course_id})
294
+ # if course and 'sessions' in course and course['sessions']:
295
+ # last_session_id = max((session['session_id'] for session in course['sessions']))
296
+ # last_session_id = int(last_session_id[1:])
297
+ # new_session_id = last_session_id + 1
298
+ # else:
299
+ # new_session_id = 1
300
+
301
+ # new_session = {
302
+ # "session_id": 'S' + new_session_id,
303
+ # "title": session_title,
304
+ # "date": datetime.datetime.combine(session_date, session_time).isoformat(),
305
+ # "status": "upcoming",
306
+ # "created_at": datetime.datetime.utcnow().isoformat(),
307
+ # "pre_class": {
308
+ # "resources": [],
309
+ # "completetion_required": True,
310
+ # },
311
+ # "in_class": {
312
+ # "topics": [],
313
+ # "quiz":
314
+ # {
315
+ # "title": '',
316
+ # "questions": 0,
317
+ # "duration": 0
318
+ # },
319
+ # "polls": []
320
+ # },
321
+ # "post_class": {
322
+ # "assignments": [],
323
+ # }
324
+ # }
325
+ # courses_collection2.update_one(
326
+ # {"course_id": course_id},
327
+ # {"$push": {"sessions": new_session}}
328
+ # )
329
+ # creation_success = True
330
+ # st.form_submit_button("Create Session")
331
+ # if creation_success == True:
332
+ # st.success("Session created successfully!")
333
+ # else:
334
+
335
+
336
+ def get_new_student_id():
337
+ """Generate a new student ID by incrementing the last student ID"""
338
+ last_student = students_collection.find_one(sort=[("SID", -1)])
339
+ if last_student:
340
+ last_student_id = int(last_student["SID"][1:])
341
+ new_student_id = f"S{last_student_id + 1}"
342
+ else:
343
+ new_student_id = "S101"
344
+ return new_student_id
345
+
346
+
347
+ def get_new_faculty_id():
348
+ """Generate a new faculty ID by incrementing the last faculty ID"""
349
+ last_faculty = faculty_collection.find_one(sort=[("TID", -1)])
350
+ if last_faculty:
351
+ last_faculty_id = int(last_faculty["TID"][1:])
352
+ new_faculty_id = f"T{last_faculty_id + 1}"
353
+ else:
354
+ new_faculty_id = "T101"
355
+ return new_faculty_id
356
+
357
+
358
+ def get_new_course_id():
359
+ """Generate a new course ID by incrementing the last course ID"""
360
+ last_course = courses_collection2.find_one(sort=[("course_id", -1)])
361
+ if last_course:
362
+ last_course_id = int(last_course["course_id"][2:])
363
+ new_course_id = f"CS{last_course_id + 1}"
364
+ else:
365
+ new_course_id = "CS101"
366
+ return new_course_id
367
+
368
+
369
+ # def register_page():
370
+ # st.title("Register")
371
+ # if "user_type" not in st.session_state:
372
+ # st.session_state.user_type = "student"
373
+
374
+ # # Select user type
375
+ # st.session_state.user_type = st.selectbox(
376
+ # "Select User Type", ["student", "faculty", "research_assistant"]
377
+ # )
378
+ # user_type = st.session_state.user_type
379
+ # print(user_type)
380
+
381
+ # with st.form("register_form"):
382
+ # # user_type = st.selectbox("Select User Type", ["student", "faculty", "research_assistant"])
383
+ # # print(user_type)
384
+ # full_name = st.text_input("Full Name")
385
+ # password = st.text_input("Password", type="password")
386
+ # confirm_password = st.text_input("Confirm Password", type="password")
387
+
388
+ # if user_type == "student":
389
+ # # Fetch courses for students to select from
390
+ # courses = list(courses_collection2.find({}, {"course_id": 1, "title": 1}))
391
+ # course_options = [
392
+ # f"{course['title']} ({course['course_id']})" for course in courses
393
+ # ]
394
+ # selected_courses = st.multiselect("Available Courses", course_options)
395
+
396
+ # submit = st.form_submit_button("Register")
397
+
398
+ # if submit:
399
+ # if password == confirm_password:
400
+ # hashed_password = generate_password_hash(password)
401
+ # if user_type == "student":
402
+ # new_student_id = get_new_student_id()
403
+ # enrolled_courses = [
404
+ # {
405
+ # "course_id": course.split("(")[-1][:-1],
406
+ # "title": course.split(" (")[0],
407
+ # }
408
+ # for course in selected_courses
409
+ # ]
410
+ # students_collection.insert_one(
411
+ # {
412
+ # "SID": new_student_id,
413
+ # "full_name": full_name,
414
+ # "password": hashed_password,
415
+ # "enrolled_courses": enrolled_courses,
416
+ # "created_at": datetime.utcnow(),
417
+ # }
418
+ # )
419
+ # st.success(
420
+ # f"Student registered successfully with ID: {new_student_id}"
421
+ # )
422
+ # elif user_type == "faculty":
423
+ # new_faculty_id = get_new_faculty_id()
424
+ # faculty_collection.insert_one(
425
+ # {
426
+ # "TID": new_faculty_id,
427
+ # "full_name": full_name,
428
+ # "password": hashed_password,
429
+ # "courses_taught": [],
430
+ # "created_at": datetime.utcnow(),
431
+ # }
432
+ # )
433
+ # st.success(
434
+ # f"Faculty registered successfully with ID: {new_faculty_id}"
435
+ # )
436
+ # elif user_type == "research_assistant":
437
+ # research_assistants_collection.insert_one(
438
+ # {
439
+ # "full_name": full_name,
440
+ # "password": hashed_password,
441
+ # "created_at": datetime.utcnow(),
442
+ # }
443
+ # )
444
+ # st.success("Research Assistant registered successfully!")
445
+ # else:
446
+ # st.error("Passwords do not match")
447
+
448
+
449
+ def get_new_analyst_id():
450
+ """Generate a new analyst ID by incrementing the last analyst ID"""
451
+ last_analyst = analysts_collection.find_one(sort=[("AID", -1)])
452
+ if last_analyst:
453
+ last_id = int(last_analyst["AID"][1:])
454
+ new_id = f"A{last_id + 1}"
455
+ else:
456
+ new_id = "A1"
457
+ return new_id
458
+
459
+
460
+ # def register_page():
461
+ # st.title("Register")
462
+ # if "user_type" not in st.session_state:
463
+ # st.session_state.user_type = "student"
464
+
465
+ # # Select user type
466
+ # st.session_state.user_type = st.selectbox(
467
+ # "Please select your Role", ["student", "faculty", "research_assistant", "analyst"]
468
+ # )
469
+ # user_type = st.session_state.user_type
470
+ # print(user_type)
471
+
472
+ # with st.form("register_form"):
473
+
474
+ # full_name = st.text_input("Full Name")
475
+ # password = st.text_input("Password", type="password")
476
+ # confirm_password = st.text_input("Confirm Password", type="password")
477
+
478
+ # if user_type == "student":
479
+ # # Fetch courses for students to select from
480
+ # courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
481
+ # course_options = [
482
+ # f"{course['title']} ({course['course_id']})" for course in courses
483
+ # ]
484
+ # selected_courses = st.multiselect("Available Courses", course_options)
485
+
486
+ # submit = st.form_submit_button("Register")
487
+
488
+ # if submit:
489
+ # if password == confirm_password:
490
+ # hashed_password = generate_password_hash(password)
491
+ # if user_type == "student":
492
+ # new_student_id = get_new_student_id()
493
+ # enrolled_courses = [
494
+ # {
495
+ # "course_id": course.split("(")[-1][:-1],
496
+ # "title": course.split(" (")[0],
497
+ # }
498
+ # for course in selected_courses
499
+ # ]
500
+ # students_collection.insert_one(
501
+ # {
502
+ # "SID": new_student_id,
503
+ # "full_name": full_name,
504
+ # "password": hashed_password,
505
+ # "enrolled_courses": enrolled_courses,
506
+ # "created_at": datetime.utcnow(),
507
+ # }
508
+ # )
509
+ # st.success(
510
+ # f"Student registered successfully with ID: {new_student_id}"
511
+ # )
512
+ # elif user_type == "faculty":
513
+ # new_faculty_id = get_new_faculty_id()
514
+ # faculty_collection.insert_one(
515
+ # {
516
+ # "TID": new_faculty_id,
517
+ # "full_name": full_name,
518
+ # "password": hashed_password,
519
+ # "courses_taught": [],
520
+ # "created_at": datetime.utcnow(),
521
+ # }
522
+ # )
523
+ # st.success(
524
+ # f"Faculty registered successfully with ID: {new_faculty_id}"
525
+ # )
526
+ # elif user_type == "research_assistant":
527
+ # research_assistants_collection.insert_one(
528
+ # {
529
+ # "full_name": full_name,
530
+ # "password": hashed_password,
531
+ # "created_at": datetime.utcnow(),
532
+ # }
533
+ # )
534
+ # st.success("Research Assistant registered successfully!")
535
+ # elif user_type == "analyst":
536
+ # # new_analyst_id = get_new_analyst_id()
537
+ # analysts_collection.insert_one(
538
+ # {
539
+ # # "AID": new_analyst_id,
540
+ # "full_name": full_name,
541
+ # "password": hashed_password,
542
+ # "created_at": datetime.utcnow(),
543
+ # }
544
+ # )
545
+ # st.success("Analyst registered successfully!")
546
+ # else:
547
+ # st.error("Passwords do not match")
548
+ def register_page():
549
+ st.title("Register for NOVAScholar")
550
+ if "user_type" not in st.session_state:
551
+ st.session_state.user_type = "student"
552
+
553
+ # Select user type
554
+ st.session_state.user_type = st.selectbox(
555
+ "Please select your Role",
556
+ ["student", "faculty", "research_assistant", "analyst"]
557
+ )
558
+ user_type = st.session_state.user_type
559
+
560
+ with st.form("register_form"):
561
+ col1, col2 = st.columns(2)
562
+
563
+ with col1:
564
+ full_name = st.text_input("Full Name")
565
+ email = st.text_input("Institutional Email")
566
+ phone = st.text_input("Phone Number")
567
+
568
+ with col2:
569
+ password = st.text_input("Password", type="password")
570
+ confirm_password = st.text_input("Confirm Password", type="password")
571
+
572
+ if user_type == "student":
573
+ courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
574
+ course_options = [f"{course['title']} ({course['course_id']})" for course in courses]
575
+ selected_courses = st.multiselect("Available Courses", course_options)
576
+
577
+ submit = st.form_submit_button("Register")
578
+
579
+ if submit:
580
+ # Validate email
581
+ email_valid, email_msg = validate_email(email)
582
+ if not email_valid:
583
+ st.error(email_msg)
584
+ return
585
+
586
+ # Validate phone
587
+ phone_valid, phone_msg = validate_phone(phone)
588
+ if not phone_valid:
589
+ st.error(phone_msg)
590
+ return
591
+
592
+ # Validate password match
593
+ if password != confirm_password:
594
+ st.error("Passwords do not match")
595
+ return
596
+
597
+ # Extract username from email
598
+ username = extract_username(email)
599
+
600
+ # Check if username already exists
601
+ if user_type == "student":
602
+ existing_user = students_collection.find_one({"username": username})
603
+ elif user_type == "faculty":
604
+ existing_user = faculty_collection.find_one({"username": username})
605
+ elif user_type == "research_assistant":
606
+ existing_user = research_assistants_collection.find_one({"username": username})
607
+ elif user_type == "analyst":
608
+ existing_user = analysts_collection.find_one({"username": username})
609
+
610
+ if existing_user:
611
+ st.error("A user with this email already exists")
612
+ return
613
+
614
+ # Hash password and create user
615
+ hashed_password = generate_password_hash(password)
616
+
617
+ user_data = {
618
+ "username": username,
619
+ "full_name": full_name,
620
+ "email": email,
621
+ "phone": phone,
622
+ "password": hashed_password,
623
+ "created_at": datetime.utcnow()
624
+ }
625
+
626
+ if user_type == "student":
627
+ new_student_id = get_new_student_id()
628
+ enrolled_courses = [
629
+ {
630
+ "course_id": course.split("(")[-1][:-1],
631
+ "title": course.split(" (")[0],
632
+ }
633
+ for course in selected_courses
634
+ ]
635
+ user_data["SID"] = new_student_id
636
+ user_data["enrolled_courses"] = enrolled_courses
637
+ students_collection.insert_one(user_data)
638
+ st.success(f"Student registered successfully! Your username is: {username}")
639
+
640
+ elif user_type == "faculty":
641
+ new_faculty_id = get_new_faculty_id()
642
+ user_data["TID"] = new_faculty_id
643
+ user_data["courses_taught"] = []
644
+ faculty_collection.insert_one(user_data)
645
+ st.success(f"Faculty registered successfully! Your username is: {username}")
646
+
647
+ elif user_type == "research_assistant":
648
+ research_assistants_collection.insert_one(user_data)
649
+ st.success(f"Research Assistant registered successfully! Your username is: {username}")
650
+
651
+ elif user_type == "analyst":
652
+ analysts_collection.insert_one(user_data)
653
+ st.success(f"Analyst registered successfully! Your username is: {username}")
654
+
655
+ # Create Course feature
656
+ # def create_course_form2(faculty_name, faculty_id):
657
+ # """Display enhanced form to create a new course with AI-generated content"""
658
+ # st.title("Create New Course")
659
+
660
+ # if 'course_plan' not in st.session_state:
661
+ # st.session_state.course_plan = None
662
+ # if 'edit_mode' not in st.session_state:
663
+ # st.session_state.edit_mode = False
664
+
665
+ # # Initial Course Creation Form
666
+ # if not st.session_state.course_plan:
667
+ # with st.form("initial_course_form"):
668
+ # col1, col2 = st.columns(2)
669
+ # with col1:
670
+ # course_name = st.text_input("Course Name", placeholder="e.g., Introduction to Computer Science")
671
+ # faculty_info = st.text_input("Faculty", value=faculty_name, disabled=True)
672
+ # with col2:
673
+ # duration_weeks = st.number_input("Duration (weeks)", min_value=1, max_value=16, value=12)
674
+ # start_date = st.date_input("Start Date")
675
+
676
+ # generate_button = st.form_submit_button("Generate Course Structure", use_container_width=True)
677
+
678
+ # if generate_button and course_name:
679
+ # with st.spinner("Generating course structure..."):
680
+ # try:
681
+ # course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
682
+ # # print(course_plan)
683
+ # st.session_state.course_plan = json.loads(course_plan)
684
+ # st.session_state.start_date = start_date
685
+ # st.session_state.duration_weeks = duration_weeks
686
+ # st.rerun()
687
+ # except Exception as e:
688
+ # st.error(f"Error generating course structure: {e}")
689
+
690
+ # # Display and Edit Generated Course Content
691
+ # if st.session_state.course_plan:
692
+ # with st.expander("Course Overview", expanded=True):
693
+ # if not st.session_state.edit_mode:
694
+ # st.subheader(st.session_state.course_plan['course_title'])
695
+ # st.write(st.session_state.course_plan['course_description'])
696
+ # edit_button = st.button("Edit Course Details", use_container_width=True)
697
+ # if edit_button:
698
+ # st.session_state.edit_mode = True
699
+ # st.rerun()
700
+ # else:
701
+ # with st.form("edit_course_details"):
702
+ # st.session_state.course_plan['course_title'] = st.text_input(
703
+ # "Course Title",
704
+ # value=st.session_state.course_plan['course_title']
705
+ # )
706
+ # st.session_state.course_plan['course_description'] = st.text_area(
707
+ # "Course Description",
708
+ # value=st.session_state.course_plan['course_description']
709
+ # )
710
+ # if st.form_submit_button("Save Course Details"):
711
+ # st.session_state.edit_mode = False
712
+ # st.rerun()
713
+
714
+ # # Display Modules and Sessions
715
+ # st.subheader("Course Modules and Sessions")
716
+
717
+ # start_date = st.session_state.start_date
718
+ # current_date = start_date
719
+
720
+ # all_sessions = []
721
+ # for module_idx, module in enumerate(st.session_state.course_plan['modules']):
722
+ # with st.expander(f"📚 Module {module_idx + 1}: {module['module_title']}", expanded=True):
723
+ # # Edit module title
724
+ # new_module_title = st.text_input(
725
+ # f"Module {module_idx + 1} Title",
726
+ # value=module['module_title'],
727
+ # key=f"module_{module_idx}"
728
+ # )
729
+ # module['module_title'] = new_module_title
730
+
731
+ # for sub_idx, sub_module in enumerate(module['sub_modules']):
732
+ # st.markdown(f"### 📖 {sub_module['title']}")
733
+
734
+ # # Create sessions for each topic
735
+ # for topic_idx, topic in enumerate(sub_module['topics']):
736
+ # session_key = f"session_{module_idx}_{sub_idx}_{topic_idx}"
737
+
738
+ # with st.container():
739
+ # col1, col2, col3 = st.columns([3, 2, 1])
740
+ # with col1:
741
+ # new_topic = st.text_input(
742
+ # "Topic",
743
+ # value=topic,
744
+ # key=f"{session_key}_topic"
745
+ # )
746
+ # sub_module['topics'][topic_idx] = new_topic
747
+
748
+ # with col2:
749
+ # session_date = st.date_input(
750
+ # "Session Date",
751
+ # value=current_date,
752
+ # key=f"{session_key}_date"
753
+ # )
754
+
755
+ # with col3:
756
+ # session_status = st.selectbox(
757
+ # "Status",
758
+ # options=["upcoming", "in-progress", "completed"],
759
+ # key=f"{session_key}_status"
760
+ # )
761
+
762
+ # # Create session object
763
+ # session = {
764
+ # "session_id": str(ObjectId()),
765
+ # "title": new_topic,
766
+ # "date": datetime.combine(session_date, datetime.min.time()),
767
+ # "status": session_status,
768
+ # "module_name": module['module_title'],
769
+ # "created_at": datetime.utcnow(),
770
+ # "pre_class": {
771
+ # "resources": [],
772
+ # "completion_required": True
773
+ # },
774
+ # "in_class": {
775
+ # "quiz": [],
776
+ # "polls": []
777
+ # },
778
+ # "post_class": {
779
+ # "assignments": []
780
+ # }
781
+ # }
782
+ # all_sessions.append(session)
783
+ # current_date = session_date + timedelta(days=7)
784
+
785
+ # new_course_id = get_new_course_id()
786
+ # course_title = st.session_state.course_plan['course_title']
787
+ # # Final Save Button
788
+ # if st.button("Save Course", type="primary", use_container_width=True):
789
+ # try:
790
+ # course_doc = {
791
+ # "course_id": new_course_id,
792
+ # "title": course_title,
793
+ # "description": st.session_state.course_plan['course_description'],
794
+ # "faculty": faculty_name,
795
+ # "faculty_id": faculty_id,
796
+ # "duration": f"{st.session_state.duration_weeks} weeks",
797
+ # "start_date": datetime.combine(st.session_state.start_date, datetime.min.time()),
798
+ # "created_at": datetime.utcnow(),
799
+ # "sessions": all_sessions
800
+ # }
801
+
802
+ # # Insert into database
803
+ # courses_collection.insert_one(course_doc)
804
+
805
+ # st.success("Course successfully created!")
806
+
807
+ # # Update faculty collection
808
+ # faculty_collection.update_one(
809
+ # {"_id": st.session_state.user_id},
810
+ # {
811
+ # "$push": {
812
+ # "courses_taught": {
813
+ # "course_id": new_course_id,
814
+ # "title": course_title,
815
+ # }
816
+ # }
817
+ # },
818
+ # )
819
+
820
+ # # Clear session state
821
+ # st.session_state.course_plan = None
822
+ # st.session_state.edit_mode = False
823
+
824
+ # # Optional: Add a button to view the created course
825
+ # if st.button("View Course"):
826
+ # # Add navigation logic here
827
+ # pass
828
+
829
+ # except Exception as e:
830
+ # st.error(f"Error saving course: {e}")
831
+
832
+
833
+ def remove_json_backticks(json_string):
834
+ """Remove backticks and 'json' from the JSON object string"""
835
+ return json_string.replace("```json", "").replace("```", "").strip()
836
+
837
+
838
+ def create_course_form(faculty_name, faculty_id):
839
+ """Display enhanced form to create a new course with AI-generated content and resources"""
840
+
841
+ st.title("Create New Course")
842
+
843
+ if 'course_plan' not in st.session_state:
844
+ st.session_state.course_plan = None
845
+ if 'edit_mode' not in st.session_state:
846
+ st.session_state.edit_mode = False
847
+ if 'resources_map' not in st.session_state:
848
+ st.session_state.resources_map = {}
849
+ if 'start_date' not in st.session_state:
850
+ st.session_state.start_date = None
851
+ if 'duration_weeks' not in st.session_state:
852
+ st.session_state.duration_weeks = None
853
+ if 'sessions_per_week' not in st.session_state:
854
+ st.session_state.sessions_per_week = None
855
+
856
+
857
+ # Initial Course Creation Form
858
+ if not st.session_state.course_plan:
859
+ with st.form("initial_course_form"):
860
+ col1, col2 = st.columns(2)
861
+ with col1:
862
+ course_name = st.text_input("Course Name", placeholder="e.g., Introduction to Computer Science")
863
+ faculty_info = st.text_input("Faculty", value=faculty_name, disabled=True)
864
+ sessions_per_week = st.number_input("Sessions Per Week", min_value=1, max_value=5, value=2)
865
+ with col2:
866
+ duration_weeks = st.number_input("Duration (weeks)", min_value=1, max_value=16, value=12)
867
+ start_date = st.date_input("Start Date")
868
+
869
+ generate_button = st.form_submit_button("Generate Course Structure", use_container_width=True)
870
+
871
+ if generate_button and course_name:
872
+ with st.spinner("Generating course structure and resources..."):
873
+ try:
874
+ # Generate course plan with resources
875
+ course_plan = generate_perplexity_response(
876
+ PERPLEXITY_API_KEY,
877
+ course_name,
878
+ duration_weeks,
879
+ sessions_per_week
880
+ )
881
+ try:
882
+ course_plan_json = json.loads(course_plan)
883
+ validate_course_plan(course_plan_json)
884
+ st.session_state.course_plan = course_plan_json
885
+ except (json.JSONDecodeError, ValueError) as e:
886
+ st.error(f"Error in course plan structure: {e}")
887
+ return
888
+ st.session_state.start_date = start_date
889
+ st.session_state.duration_weeks = duration_weeks
890
+ st.session_state.sessions_per_week = sessions_per_week
891
+
892
+ # Generate resources for all sessions
893
+ session_titles = []
894
+ for module in st.session_state.course_plan['modules']:
895
+ for sub_module in module['sub_modules']:
896
+ for topic in sub_module['topics']:
897
+ # session_titles.append(topic['title'])
898
+ # session_titles.append(topic)
899
+ if isinstance(topic, dict):
900
+ session_titles.append(topic['title'])
901
+ else:
902
+ session_titles.append(topic)
903
+ # In generate_session_resources function, add validation:
904
+ if not session_titles:
905
+ return json.dumps({"session_resources": []})
906
+ resources_response = generate_session_resources(PERPLEXITY_API_KEY, session_titles)
907
+ without_backticks = remove_json_backticks(resources_response)
908
+ resources = json.loads(without_backticks)
909
+ st.session_state.resources_map = {
910
+ resource['session_title']: resource['resources']
911
+ for resource in resources['session_resources']
912
+ }
913
+ # Add error handling for the resources map
914
+ # if st.session_state.resources_map is None:
915
+ # st.session_state.resources_map = {}
916
+
917
+ st.rerun()
918
+ except Exception as e:
919
+ st.error(f"Error generating course structure: {e}")
920
+
921
+ # Display and Edit Generated Course Content
922
+ if st.session_state.course_plan:
923
+ with st.expander("Course Overview", expanded=True):
924
+ if not st.session_state.edit_mode:
925
+ st.subheader(st.session_state.course_plan['course_title'])
926
+ st.write(st.session_state.course_plan['course_description'])
927
+ col1, col2, col3 = st.columns(3)
928
+ with col1:
929
+ st.write(f"**Start Date:** {st.session_state.start_date}")
930
+ with col2:
931
+ st.write(f"**Duration (weeks):** {st.session_state.duration_weeks}")
932
+ with col3:
933
+ st.write(f"**Sessions Per Week:** {st.session_state.sessions_per_week}")
934
+
935
+ edit_button = st.button("Edit Course Details", use_container_width=True)
936
+ if edit_button:
937
+ st.session_state.edit_mode = True
938
+ st.rerun()
939
+ else:
940
+ with st.form("edit_course_details"):
941
+ st.session_state.course_plan['course_title'] = st.text_input(
942
+ "Course Title",
943
+ value=st.session_state.course_plan['course_title']
944
+ )
945
+ st.session_state.course_plan['course_description'] = st.text_area(
946
+ "Course Description",
947
+ value=st.session_state.course_plan['course_description']
948
+ )
949
+ if st.form_submit_button("Save Course Details"):
950
+ st.session_state.edit_mode = False
951
+ st.rerun()
952
+
953
+ # Display Modules and Sessions
954
+ st.subheader("Course Modules and Sessions")
955
+
956
+ start_date = st.session_state.start_date
957
+ current_date = start_date
958
+
959
+ all_sessions = []
960
+ for module_idx, module in enumerate(st.session_state.course_plan['modules']):
961
+ with st.expander(f"📚 Module {module_idx + 1}: {module['module_title']}", expanded=True):
962
+ # Edit module title
963
+ new_module_title = st.text_input(
964
+ f"Edit Module Title",
965
+ value=module['module_title'],
966
+ key=f"module_{module_idx}"
967
+ )
968
+ module['module_title'] = new_module_title
969
+
970
+ for sub_idx, sub_module in enumerate(module['sub_modules']):
971
+ st.markdown("<br>", unsafe_allow_html=True) # Add gap between sessions
972
+ # st.markdown(f"### 📖 {sub_module['title']}")
973
+ st.markdown(f'<h3 style="font-size: 1.25rem;">📖 Chapter {sub_idx + 1}: {sub_module["title"]}</h3>', unsafe_allow_html=True)
974
+ # Possible fix:
975
+ # Inside the loop where topics are being processed:
976
+
977
+ for topic_idx, topic in enumerate(sub_module['topics']):
978
+ st.markdown("<br>", unsafe_allow_html=True) # Add gap between sessions
979
+ session_key = f"session_{module_idx}_{sub_idx}_{topic_idx}"
980
+
981
+ # Get topic title based on type
982
+ if isinstance(topic, dict):
983
+ current_topic_title = topic.get('title', '')
984
+ current_topic_display = current_topic_title
985
+ else:
986
+ current_topic_title = str(topic)
987
+ current_topic_display = current_topic_title
988
+
989
+ with st.container():
990
+ # Session Details
991
+ col1, col2, col3 = st.columns([3, 2, 1])
992
+ with col1:
993
+ new_topic = st.text_input(
994
+ f"Session {topic_idx + 1} Title",
995
+ value=current_topic_display,
996
+ key=f"{session_key}_topic"
997
+ )
998
+ # Update the topic in the data structure
999
+ if isinstance(topic, dict):
1000
+ topic['title'] = new_topic
1001
+ else:
1002
+ sub_module['topics'][topic_idx] = new_topic
1003
+
1004
+ with col2:
1005
+ session_date = st.date_input(
1006
+ "Session Date",
1007
+ value=current_date,
1008
+ key=f"{session_key}_date"
1009
+ )
1010
+
1011
+ with col3:
1012
+ session_status = st.selectbox(
1013
+ "Status",
1014
+ options=["upcoming", "in-progress", "completed"],
1015
+ key=f"{session_key}_status"
1016
+ )
1017
+
1018
+ # Display Resources
1019
+ if st.session_state.resources_map:
1020
+ # Try both the full topic title and the display title
1021
+ resources = None
1022
+ if isinstance(topic, dict) and topic.get('title') in st.session_state.resources_map:
1023
+ resources = st.session_state.resources_map[topic['title']]
1024
+ elif current_topic_title in st.session_state.resources_map:
1025
+ resources = st.session_state.resources_map[current_topic_title]
1026
+
1027
+ if resources:
1028
+ with st.container():
1029
+ # st.markdown("#### 📚 Session Resources")
1030
+ st.markdown(f'<h4 style="font-size: 1.25rem;">📚 Session Resources</h4>', unsafe_allow_html=True)
1031
+ # Readings Tab
1032
+ if resources.get('readings'):
1033
+ st.markdown(f'<h5 style="font-size: 1.1rem; margin-top: 1rem;">📖 External Resources</h5>', unsafe_allow_html=True)
1034
+ col1, col2 = st.columns(2)
1035
+ for idx, reading in enumerate(resources['readings']):
1036
+ with col1 if idx % 2 == 0 else col2:
1037
+ st.markdown(f"""
1038
+ - **{reading['title']}**
1039
+ - Type: {reading['type']}
1040
+ - Estimated reading time: {reading['estimated_read_time']}
1041
+ - [Access Resource]({reading['url']})
1042
+ """)
1043
+
1044
+ # Books Tab and Additional Resources Tab side-by-side
1045
+ col1, col2 = st.columns(2)
1046
+
1047
+ with col1:
1048
+ if resources.get('books'):
1049
+ st.markdown(f'<h5 style="font-size: 1.1rem; margin-top: 1rem;">📚 Reference Books</h5>', unsafe_allow_html=True)
1050
+ for book in resources['books']:
1051
+ with st.container():
1052
+ st.markdown(f"""
1053
+ - **{book['title']}**
1054
+ - Author: {book['author']}
1055
+ - ISBN: {book['isbn']}
1056
+ - Chapters: {book['chapters']}
1057
+ """)
1058
+
1059
+ with col2:
1060
+ if resources.get('additional_resources'):
1061
+ st.markdown(f'<h5 style="font-size: 1.1rem; margin-top: 1rem;">🔗 Additional Study Resources</h5>', unsafe_allow_html=True)
1062
+ for resource in resources['additional_resources']:
1063
+ with st.container():
1064
+ st.markdown(f"""
1065
+ - **{resource['title']}**
1066
+ - Type: {resource['type']}
1067
+ - Description: {resource['description']}
1068
+ - [Access Resource]({resource['url']})
1069
+ """)
1070
+
1071
+ # Create session object
1072
+ session = {
1073
+ "session_id": str(ObjectId()),
1074
+ "title": new_topic,
1075
+ "date": datetime.combine(session_date, datetime.min.time()),
1076
+ "status": session_status,
1077
+ "module_name": module['module_title'],
1078
+ "created_at": datetime.utcnow(),
1079
+ "pre_class": {
1080
+ "resources": [],
1081
+ "completion_required": True
1082
+ },
1083
+ "in_class": {
1084
+ "quiz": [],
1085
+ "polls": []
1086
+ },
1087
+ "post_class": {
1088
+ "assignments": []
1089
+ },
1090
+ "external_resources": st.session_state.resources_map.get(current_topic_title, {})
1091
+ }
1092
+ all_sessions.append(session)
1093
+ current_date = session_date + timedelta(days=7)
1094
+
1095
+
1096
+ new_course_id = get_new_course_id()
1097
+ course_title = st.session_state.course_plan['course_title']
1098
+
1099
+ # Final Save Button
1100
+ if st.button("Save Course", type="primary", use_container_width=True):
1101
+ try:
1102
+ course_doc = {
1103
+ "course_id": new_course_id,
1104
+ "title": course_title,
1105
+ "description": st.session_state.course_plan['course_description'],
1106
+ "faculty": faculty_name,
1107
+ "faculty_id": faculty_id,
1108
+ "duration": f"{st.session_state.duration_weeks} weeks",
1109
+ "sessions_per_week": st.session_state.sessions_per_week,
1110
+ "start_date": datetime.combine(st.session_state.start_date, datetime.min.time()),
1111
+ "created_at": datetime.utcnow(),
1112
+ "sessions": all_sessions
1113
+ }
1114
+
1115
+ # Insert into database
1116
+ courses_collection.insert_one(course_doc)
1117
+ st.success("Course successfully created!")
1118
+
1119
+ # Update faculty collection
1120
+ faculty_collection.update_one(
1121
+ {"_id": st.session_state.user_id},
1122
+ {
1123
+ "$push": {
1124
+ "courses_taught": {
1125
+ "course_id": new_course_id,
1126
+ "title": course_title,
1127
+ }
1128
+ }
1129
+ }
1130
+ )
1131
+
1132
+ # Clear session state
1133
+ st.session_state.course_plan = None
1134
+ st.session_state.edit_mode = False
1135
+ st.session_state.resources_map = {}
1136
+
1137
+ # Optional: Add a button to view the created course
1138
+ if st.button("View Course"):
1139
+ # Add navigation logic here
1140
+ pass
1141
+
1142
+ except Exception as e:
1143
+ st.error(f"Error saving course: {e}")
1144
+
1145
+
1146
+
1147
+ from research_assistant_dashboard import display_research_assistant_dashboard
1148
+ from goals2 import display_analyst_dashboard
1149
+ def enroll_in_course(course_id, course_title, student):
1150
+ """Enroll a student in a course"""
1151
+ if student:
1152
+ courses = student.get("enrolled_courses", [])
1153
+ if course_id not in [course["course_id"] for course in courses]:
1154
+ course = courses_collection.find_one({"course_id": course_id})
1155
+ if course:
1156
+ courses.append(
1157
+ {
1158
+ "course_id": course["course_id"],
1159
+ "title": course["title"],
1160
+ }
1161
+ )
1162
+ students_collection.update_one(
1163
+ {"_id": st.session_state.user_id},
1164
+ {"$set": {"enrolled_courses": courses}},
1165
+ )
1166
+ st.success(f"Enrolled in course {course_title}")
1167
+ # st.experimental_rerun()
1168
+ else:
1169
+ st.error("Course not found")
1170
+ else:
1171
+ st.warning("Already enrolled in this course")
1172
+
1173
+ # def enroll_in_course_page(course_id):
1174
+ # """Enroll a student in a course"""
1175
+ # student = students_collection.find_one({"_id": st.session_state.user_id})
1176
+ # course_title = courses_collection.find_one({"course_id": course_id})["title"]
1177
+
1178
+ # course = courses_collection.find_one({"course_id": course_id})
1179
+ # if course:
1180
+ # st.title(course["title"])
1181
+ # st.subheader("Course Description:")
1182
+ # st.write(course["description"])
1183
+ # st.write(f"Faculty: {course['faculty']}")
1184
+ # st.write(f"Duration: {course['duration']}")
1185
+
1186
+ # st.title("Course Sessions")
1187
+ # for session in course["sessions"]:
1188
+ # st.write(f"Session: {session['title']}")
1189
+ # st.write(f"Date: {session['date']}")
1190
+ # st.write(f"Status: {session['status']}")
1191
+ # st.write("----")
1192
+ # else:
1193
+ # st.error("Course not found")
1194
+
1195
+ # enroll_button = st.button("Enroll in Course", key="enroll_button", use_container_width=True)
1196
+ # if enroll_button:
1197
+ # enroll_in_course(course_id, course_title, student)
1198
+ def enroll_in_course_page(course_id):
1199
+ """Display an aesthetically pleasing course enrollment page"""
1200
+ student = students_collection.find_one({"_id": st.session_state.user_id})
1201
+ course = courses_collection.find_one({"course_id": course_id})
1202
+
1203
+ if not course:
1204
+ st.error("Course not found")
1205
+ return
1206
+
1207
+ # Create two columns for layout
1208
+ col1, col2 = st.columns([2, 1])
1209
+
1210
+ with col1:
1211
+ # Course header section
1212
+ st.title(course["title"])
1213
+ st.markdown(f"*{course['description']}*")
1214
+
1215
+ # Course details in an expander
1216
+ with st.expander("Course Details", expanded=True):
1217
+ st.markdown(f"👨‍🏫 **Faculty:** {course['faculty']}")
1218
+ st.markdown(f"⏱️ **Duration:** {course['duration']}")
1219
+
1220
+ # Sessions in a clean card-like format
1221
+ st.subheader("📚 Course Sessions")
1222
+ for idx, session in enumerate(course["sessions"], 1):
1223
+ with st.container():
1224
+ st.markdown(f"""
1225
+ ---
1226
+ ### Session {idx}: {session['title']}
1227
+ 🗓️ **Date:** {session['date']}
1228
+ 📌 **Status:** {session['status']}
1229
+ """)
1230
+
1231
+ with col2:
1232
+ with st.container():
1233
+ st.markdown("### Ready to Learn?")
1234
+ st.markdown("Click below to enroll in this course")
1235
+
1236
+ # Check if already enrolled
1237
+ courses = student.get("enrolled_courses", [])
1238
+ is_enrolled = course_id in [c["course_id"] for c in courses]
1239
+
1240
+ if is_enrolled:
1241
+ st.info("✅ You are already enrolled in this course")
1242
+ else:
1243
+ enroll_button = st.button(
1244
+ "🎓 Enroll Now",
1245
+ key="enroll_button",
1246
+ use_container_width=True
1247
+ )
1248
+ if enroll_button:
1249
+ enroll_in_course(course_id, course["title"], student)
1250
+
1251
+ def show_available_courses(username, user_type, user_id):
1252
+ """Display available courses for enrollment"""
1253
+ st.title("Available Courses")
1254
+
1255
+ courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
1256
+ course_options = [
1257
+ f"{course['title']} ({course['course_id']})" for course in courses
1258
+ ]
1259
+
1260
+ selected_course = st.selectbox("Select a Course to Enroll", course_options)
1261
+ # if selected_courses:
1262
+ # for course in selected_courses:
1263
+ # course_id = course.split("(")[-1][:-1]
1264
+ # course_title = course.split(" (")[0]
1265
+ # enroll_in_course(course_id, course_title, user_id)
1266
+ # st.success("Courses enrolled successfully!")
1267
+ if selected_course:
1268
+ course_id = selected_course.split("(")[-1][:-1]
1269
+ enroll_in_course_page(course_id)
1270
+
1271
+ def validate_email(email):
1272
+ """Validate email format and domain"""
1273
+ # Basic email pattern
1274
+ pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
1275
+ if not re.match(pattern, email):
1276
+ return False, "Invalid email format"
1277
+
1278
+ # You can add additional institution-specific validation here
1279
+ # For example, checking if the domain is from your institution
1280
+ # allowed_domains = ["spit.ac.in"] # Add more domains as needed
1281
+ # domain = email.split('@')[1]
1282
+ # if domain not in allowed_domains:
1283
+ # return False, "Please use your institutional email address"
1284
+
1285
+ return True, "Valid email"
1286
+
1287
+ def validate_phone(phone):
1288
+ """Validate phone number format"""
1289
+ # Assuming Indian phone numbers
1290
+ pattern = r'^[6-9]\d{9}$'
1291
+ if not re.match(pattern, phone):
1292
+ return False, "Invalid phone number format. Please enter a 10-digit Indian mobile number"
1293
+ return True, "Valid phone number"
1294
+
1295
+ def extract_username(email):
1296
+ """Extract username from email"""
1297
+ return email.split('@')[0]
1298
+
1299
+
1300
+
1301
+
1302
+ def main_dashboard():
1303
+ if st.session_state.user_type == "research_assistant":
1304
+ display_research_assistant_dashboard()
1305
+ elif st.session_state.user_type == "analyst":
1306
+ display_analyst_dashboard()
1307
+ else:
1308
+ selected_course_id = None
1309
+ create_session = False
1310
+ with st.sidebar:
1311
+ st.title(f"Welcome, {st.session_state.username}")
1312
+ if st.session_state.user_type == "student":
1313
+ st.title("Enrolled Courses")
1314
+ else:
1315
+ st.title("Your Courses")
1316
+
1317
+ # Course selection
1318
+ enrolled_courses = get_courses(
1319
+ st.session_state.username, st.session_state.user_type
1320
+ )
1321
+
1322
+ # Enroll in Courses
1323
+ if st.session_state.user_type == "student":
1324
+ if st.button(
1325
+ "Enroll in a New Course", key="enroll_course", use_container_width=True
1326
+ ):
1327
+ st.session_state.show_enroll_course_page = True
1328
+
1329
+ # if st.session_state.show_enroll_course_form:
1330
+ # courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
1331
+ # courses += list(courses_collection2.find({}, {"course_id": 1, "title": 1}))
1332
+ # course_options = [f"{course['title']} ({course['course_id']})" for course in courses]
1333
+ # course_to_enroll = st.selectbox("Available Courses", course_options)
1334
+ # st.session_state.course_to_enroll = course_to_enroll
1335
+
1336
+ if st.session_state.user_type == "faculty":
1337
+ if st.button(
1338
+ "Create New Course", key="create_course", use_container_width=True
1339
+ ):
1340
+ st.session_state.show_create_course_form = True
1341
+
1342
+ if not enrolled_courses:
1343
+ st.warning("No courses found")
1344
+ else:
1345
+ course_titles = [course["title"] for course in enrolled_courses]
1346
+ course_ids = [course["course_id"] for course in enrolled_courses]
1347
+
1348
+ selected_course = st.selectbox("Select Course", course_titles)
1349
+ selected_course_id = course_ids[course_titles.index(selected_course)]
1350
+ print("Selected Course ID: ", selected_course_id)
1351
+
1352
+ st.session_state.selected_course = selected_course
1353
+ st.session_state.selected_course_id = selected_course_id
1354
+
1355
+ # Display course sessions
1356
+ sessions = get_sessions(selected_course_id, selected_course)
1357
+
1358
+ st.title("Course Sessions")
1359
+ for i, session in enumerate(sessions, start=1):
1360
+ if st.button(
1361
+ f"Session {i}", key=f"session_{i}", use_container_width=True
1362
+ ):
1363
+ st.session_state.selected_session = session
1364
+
1365
+ if st.session_state.user_type == "faculty":
1366
+ # Create new session
1367
+ # create_session = st.button("Create New Session Button", key="create_session", use_container_width=True)
1368
+ if st.button(
1369
+ "Create New Session",
1370
+ key="create_session",
1371
+ use_container_width=True,
1372
+ ):
1373
+ st.session_state.show_create_session_form = True
1374
+
1375
+ if st.button("Logout", use_container_width=True):
1376
+ for key in st.session_state.keys():
1377
+ del st.session_state[key]
1378
+ st.rerun()
1379
+
1380
+ # if create_session:
1381
+ # create_session_form(selected_course_id)
1382
+ if st.session_state.get("show_create_course_form"):
1383
+ create_course_form(st.session_state.username, st.session_state.user_id)
1384
+ elif st.session_state.get("show_create_session_form"):
1385
+ create_session_form(selected_course_id)
1386
+ elif st.session_state.get("show_enroll_course_page"):
1387
+ show_available_courses(st.session_state.username, st.session_state.user_type, st.session_state.user_id)
1388
+ else:
1389
+ # Main content
1390
+ if "selected_session" in st.session_state:
1391
+ display_session_content(
1392
+ st.session_state.user_id,
1393
+ selected_course_id,
1394
+ st.session_state.selected_session,
1395
+ st.session_state.username,
1396
+ st.session_state.user_type,
1397
+ )
1398
+ else:
1399
+ st.info("Select a session to view details")
1400
+ # # Main content
1401
+ # if 'selected_session' in st.session_state:
1402
+ # display_session_content(st.session_state.user_id, selected_course_id, st.session_state.selected_session, st.session_state.username, st.session_state.user_type)
1403
+ # if create_session:
1404
+ # create_session_form(selected_course_id)
1405
+
1406
+
1407
+ def main():
1408
+ st.set_page_config(page_title="NOVAScholar", page_icon="📚", layout="wide")
1409
+ init_session_state()
1410
+ # modify_courses_collection_schema()
1411
+
1412
+ if not st.session_state.authenticated:
1413
+ login_tab, register_tab = st.tabs(["Login", "Register"])
1414
+
1415
+ with register_tab:
1416
+ register_page()
1417
+ with login_tab:
1418
+ login_form()
1419
+ else:
1420
+ main_dashboard()
1421
+
1422
+
1423
+ if __name__ == "__main__":
1424
+ main()
chatbot.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import datetime
3
+ from db import courses_collection2, faculty_collection, students_collection, vectors_collection, chat_history_collection
4
+ from PIL import Image
5
+ from dotenv import load_dotenv
6
+ import os
7
+ from datetime import datetime
8
+ from bson import ObjectId
9
+ from file_upload_vectorize import model
10
+ from gen_mcqs import generate_mcqs, quizzes_collection
11
+
12
+ load_dotenv()
13
+ MONGO_URI = os.getenv('MONGO_URI')
14
+ OPENAI_KEY = os.getenv('OPENAI_KEY')
15
+ GEMINI_KEY = os.getenv('GEMINI_KEY')
16
+
17
+ def insert_chat_message(user_id, session_id, role, content):
18
+ message = {
19
+ "role": role,
20
+ "content": content,
21
+ "timestamp": datetime.utcnow()
22
+ }
23
+
24
+ chat_history_collection.update_one(
25
+ {"user_id": ObjectId(user_id), "session_id": session_id},
26
+ {"$push": {"messages": message}, "$set": {"timestamp": datetime.utcnow()}},
27
+ upsert=True
28
+ )
29
+
30
+ def give_chat_response(user_id, session_id, question, title, description, context):
31
+ context_prompt = f"""
32
+ Based on the following session title, description, and context, answer the user's question in 3-4 lines:
33
+
34
+ Title: {title}
35
+ Description: {description}
36
+ Context: {context}
37
+
38
+ Question: {question}
39
+
40
+ Please provide a clear and concise answer based on the information provided.
41
+ """
42
+
43
+ response = model.generate_content(context_prompt)
44
+ if not response or not response.text:
45
+ return "No response received from the model"
46
+
47
+ assistant_response = response.text.strip()
48
+
49
+ # Save the chat message
50
+ insert_chat_message(user_id, session_id, "assistant", assistant_response)
51
+
52
+ return assistant_response
53
+
54
+ def create_quiz_by_context(user_id, session_id, context, length, session_title, session_description):
55
+ """Create a quiz based on the context provided"""
56
+ quiz = generate_mcqs(context, length, session_title, session_description)
57
+ if not quiz:
58
+ return "No quiz generated";
59
+
60
+ # Save the quiz
61
+ quizzes_collection.insert_one({
62
+ "user_id": ObjectId(user_id),
63
+ "session_id": ObjectId(session_id),
64
+ "questions": quiz,
65
+ "timestamp": datetime.utcnow()
66
+ })
67
+ return "Quiz created successfully"
create_course.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ import os
3
+ from typing import Dict, List, Any
4
+ from pymongo import MongoClient
5
+ import requests
6
+ import uuid
7
+ import openai
8
+ from openai import OpenAI
9
+ import streamlit as st
10
+ from bson import ObjectId
11
+ from dotenv import load_dotenv
12
+ import json
13
+
14
+ load_dotenv()
15
+ MONGODB_URI = os.getenv("MONGO_URI")
16
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
17
+ OPENAI_API_KEY = os.getenv("OPENAI_KEY")
18
+
19
+ client = MongoClient(MONGODB_URI)
20
+ db = client['novascholar_db']
21
+ courses_collection = db['courses']
22
+
23
+ def generate_perplexity_response(api_key, course_name):
24
+ headers = {
25
+ "accept": "application/json",
26
+ "content-type": "application/json",
27
+ "authorization": f"Bearer {api_key}"
28
+ }
29
+
30
+ prompt = f"""
31
+ You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate comprehensive, academically rigorous course structures for undergraduate level education.
32
+
33
+ Please generate a detailed course structure for the course {course_name} in JSON format following these specifications:
34
+
35
+ 1. The course structure should be appropriate for a full semester (14-16 weeks)
36
+ 2. Each module should be designed for 2-4 weeks of instruction
37
+ 3. Follow standard academic practices and nomenclature
38
+ 4. Ensure progressive complexity from foundational to advanced concepts
39
+ 5. The course_title should exactly match the course name provided in the prompt. No additional information should be included in the course_title field.
40
+ 6: Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
41
+ 7. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
42
+
43
+
44
+ The JSON response should follow this structure:
45
+ {{
46
+ "course_title": "string",
47
+ "course_description": "string",
48
+ "modules": [
49
+ {{
50
+ "module_title": "string",
51
+ "sub_modules": [
52
+ {{
53
+ "title": "string",
54
+ "topics": [string],
55
+ }}
56
+ ]
57
+ }}
58
+ ]
59
+ }}
60
+
61
+ Example response:
62
+ {{
63
+ "course_title": "Advanced Natural Language Processing",
64
+ "course_descriptio": "An advanced course covering modern approaches to NLP using deep learning, with focus on transformer architectures and their applications.",
65
+ "modules": [
66
+ {{
67
+ "module_title": "Foundations of Modern NLP",
68
+ "sub_modules": [
69
+ {{
70
+ "title": "Attention Mechanism",
71
+ "topics": [
72
+ "Self-attention",
73
+ "Multi-head attention",
74
+ "Positional encoding"
75
+ ]
76
+ }}
77
+ ]
78
+ }}
79
+ ]
80
+ }}
81
+ """
82
+
83
+ messages = [
84
+ {
85
+ "role": "system",
86
+ "content": (
87
+ "You are an expert educational AI assistant specializing in course design and curriculum planning. "
88
+ "Your task is to generate accurate, detailed, and structured educational content for undergraduate-level and post-graduate-level courses. "
89
+ "Provide detailed and accurate information tailored to the user's prompt."
90
+ "Ensure that the responses are logical, follow standard academic practices, and include realistic concepts relevant to the course."
91
+ ),
92
+ },
93
+ {
94
+ "role": "user",
95
+ "content": prompt
96
+ },
97
+ ]
98
+ try:
99
+ client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
100
+ response = client.chat.completions.create(
101
+ model="llama-3.1-sonar-small-128k-online",
102
+ messages=messages
103
+ )
104
+ content = response.choices[0].message.content
105
+ return content
106
+ except Exception as e:
107
+ st.error(f"Failed to fetch data from Perplexity API: {e}")
108
+ return ""
109
+
110
+ def get_new_course_id():
111
+ """Generate a new course ID by incrementing the last course ID"""
112
+ last_course = courses_collection.find_one(sort=[("course_id", -1)])
113
+ if last_course:
114
+ last_course_id = int(last_course["course_id"][2:])
115
+ new_course_id = f"CS{last_course_id + 1}"
116
+ else:
117
+ new_course_id = "CS101"
118
+ return new_course_id
119
+
120
+
121
+ def create_course(course_name, start_date, duration_weeks):
122
+ # Generate course overview
123
+ # overview_prompt = f"""Generate an overview for the undergraduate course {course_name}
124
+ # Include all relevant concepts and key topics covered in a typical curriculum.
125
+ # The response should be concise (300-400 words). Ensure that your response is in a valid JSON format."""
126
+
127
+ # overview_prompt2 = f"""Generate an overview for the undergraduate course {course_name}.
128
+ # The overview should include:
129
+ # The course title, a detailed course description,
130
+ # a division of all relevant concepts and key topics into 4-6 logical modules,
131
+ # capturing the flow and structure of a typical curriculum.
132
+ # Ensure the response adheres to the following JSON format:
133
+ # {{
134
+ # 'overview': 'string',
135
+ # 'modules': [
136
+ # {{
137
+ # 'name': 'string',
138
+ # 'description': 'string'
139
+ # }}
140
+ # ]
141
+ # }}
142
+ # overview: A detailed description of the course.
143
+ # modules: An array of 4-6 objects, each representing a logical module with a name and a brief description
144
+ # **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}"""
145
+
146
+ # course_overview = generate_perplexity_response(PERPLEXITY_API_KEY, overview_prompt2)
147
+ # # print(course_overview)
148
+ # course_overview_store = course_overview
149
+ # # print(course_overview_store)
150
+ # # Generate modules
151
+ # # modules_prompt = f"Based on this overview: {course_overview}\nCreate 4-6 logical modules for the course, each module should group related concepts and each module may include reference books if applicable"
152
+ # sub_modules_prompt = f"""Using the provided modules in the overview {course_overview_store}, generate 2-3 submodules for each module.
153
+ # Each submodule should represent a cohesive subset of the module's topics, logically organized for teaching purposes.
154
+ # Ensure the response adheres to the following JSON format:
155
+ # {
156
+ # 'modules': [
157
+ # {
158
+ # 'name': 'string',
159
+ # 'sub_modules': [
160
+ # {
161
+ # 'name': 'string',
162
+ # 'description': 'string'
163
+ # }
164
+ # ]
165
+ # }
166
+ # ]
167
+ # }
168
+ # modules: An array where each object contains the name of the module and its corresponding sub_modules.
169
+ # sub_modules: An array of 2-3 objects for each module, each having a name and a brief description."
170
+ # **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}
171
+ # """
172
+ # sub_modules = generate_perplexity_response(PERPLEXITY_API_KEY, sub_modules_prompt)
173
+
174
+ # # modules_response = generate_perplexity_response(modules_prompt)
175
+ # print(sub_modules)
176
+
177
+ # total_sessions = duration_weeks * sessions_per_week
178
+
179
+ course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
180
+ course_plan_json = json.loads(course_plan)
181
+
182
+ # Generate sessions for each module
183
+ all_sessions = []
184
+ for module in course_plan_json['modules']:
185
+ for sub_module in module['sub_modules']:
186
+ for topic in sub_module['topics']:
187
+ session = create_session(
188
+ title=topic,
189
+ date=start_date,
190
+ module_name=module['module_title']
191
+ )
192
+ # print(session)
193
+ all_sessions.append(session)
194
+ start_date += timedelta(days=7) # Next session after a week
195
+
196
+ # sample_sessions = [
197
+ # {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
198
+ # {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
199
+ # {'session_id': ObjectId('6767d0bbad8316ac358def27'), 'title': 'Types of Generative AI (e.g., GANs, VAEs, LLMs)', 'date': datetime(2025, 1, 5, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 505626), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
200
+ # {'session_id': ObjectId('6767d0bbad8316ac358def28'), 'title': 'Overview of popular GenAI tools (e.g., ChatGPT, Claude, Google Gemini)', 'date': datetime(2025, 1, 12, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
201
+ # {'session_id': ObjectId('6767d0bbad8316ac358def29'), 'title': 'Frameworks for building GenAI models (e.g., TensorFlow, PyTorch)', 'date': datetime(2025, 1, 19, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
202
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2a'), 'title': 'Integration with other AI technologies', 'date': datetime(2025, 1, 26, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 507612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
203
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2b'), 'title': 'Text-to-text models (e.g., GPT-3, BERT)', 'date': datetime(2025, 2, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
204
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2c'), 'title': 'Text generation for content creation and marketing', 'date': datetime(2025, 2, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
205
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2d'), 'title': 'Chatbots and conversational interfaces', 'date': datetime(2025, 2, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
206
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2e'), 'title': 'Generative Adversarial Networks (GANs)', 'date': datetime(2025, 2, 23, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
207
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2f'), 'title': 'Variational Autoencoders (VAEs)', 'date': datetime(2025, 3, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 510612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
208
+ # {'session_id': ObjectId('6767d0bbad8316ac358def30'), 'title': 'Applications in art, design, and media', 'date': datetime(2025, 3, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
209
+ # {'session_id': ObjectId('6767d0bbad8316ac358def31'), 'title': 'Understanding prompt design principles', 'date': datetime(2025, 3, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
210
+ # {'session_id': ObjectId('6767d0bbad8316ac358def33'), 'title': 'Advanced techniques for fine-tuning models', 'date': datetime(2025, 3, 30, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 512514), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
211
+ # {'session_id': ObjectId('6767d0bbad8316ac358def34'), 'title': 'Ethical implications of AI-generated content', 'date': datetime(2025, 4, 6, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 513613), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
212
+ # {'session_id': ObjectId('6767d0bbad8316ac358def35'), 'title': 'Addressing bias in AI models', 'date': datetime(2025, 4, 13, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
213
+ # {'session_id': ObjectId('6767d0bbad8316ac358def36'), 'title': 'Regulatory frameworks and guidelines', 'date': datetime(2025, 4, 20, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
214
+ # {'session_id': ObjectId('6767d0bbad8316ac358def37'), 'title': 'Case studies from various industries (e.g., marketing, healthcare, finance)', 'date': datetime(2025, 4, 27, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
215
+ # {'session_id': ObjectId('6767d0bbad8316ac358def38'), 'title': 'Success stories and challenges faced by companies using GenAI', 'date': datetime(2025, 5, 4, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
216
+ # {'session_id': ObjectId('6767d0bbad8316ac358def39'), 'title': 'Guidelines for developing a GenAI project', 'date': datetime(2025, 5, 11, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
217
+ # {'session_id': ObjectId('6767d0bbad8316ac358def3a'), 'title': 'Tools and resources for project implementation', 'date': datetime(2025, 5, 18, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
218
+ # {'session_id': ObjectId('6767d0bbad8316ac358def3b'), 'title': 'Best practices for testing and deployment', 'date': datetime(2025, 5, 25, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 517563), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}}
219
+ # ]
220
+
221
+ # small_sample_sessions = [
222
+ # {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
223
+ # {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
224
+ # ]
225
+
226
+
227
+ # print(all_sessions)
228
+
229
+ print("Number of sessions:", len(all_sessions))
230
+ # Create course document
231
+ # course_description = course_plan_json['course_description']
232
+ # course_doc = {
233
+ # "course_id": get_new_course_id(),
234
+ # "title": course_name,
235
+ # "description": course_description,
236
+ # "faculty": faculty_name,
237
+ # "faculty_id": faculty_id,
238
+ # "duration": f"{duration_weeks} weeks",
239
+ # "created_at": datetime.utcnow(),
240
+ # "sessions": all_sessions
241
+ # }
242
+ # try:
243
+ # courses_collection.insert_one(course_doc)
244
+ # except Exception as e:
245
+ # st.error(f"Failed to insert course data into the database: {e}")
246
+
247
+ # print(course_plan)
248
+
249
+ def create_session(title: str, date: datetime, module_name: str):
250
+ """Create a session document with pre-class, in-class, and post-class components."""
251
+ return {
252
+ "session_id": ObjectId(),
253
+ "title": title,
254
+ "date": date,
255
+ "status": "upcoming",
256
+ "created_at": datetime.utcnow(),
257
+ "pre_class": {
258
+ "resources": [],
259
+ "completion_required": True
260
+ },
261
+ "in_class": {
262
+ "quiz": [],
263
+ "polls": []
264
+ },
265
+ "post_class": {
266
+ "assignments": []
267
+ }
268
+ }
269
+
270
+ # Usage example:
271
+ if __name__ == "__main__":
272
+ create_course("Introduction to Data Analytics", datetime.now(), 2)
create_course2.py ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ import os
3
+ from typing import Dict, List, Any
4
+ from pymongo import MongoClient
5
+ import requests
6
+ import uuid
7
+ import openai
8
+ from openai import OpenAI
9
+ import streamlit as st
10
+ from bson import ObjectId
11
+ from dotenv import load_dotenv
12
+ import json
13
+
14
+ load_dotenv()
15
+ MONGODB_URI = os.getenv("MONGO_URI")
16
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
17
+ OPENAI_API_KEY = os.getenv("OPENAI_KEY")
18
+
19
+ client = MongoClient(MONGODB_URI)
20
+ db = client['novascholar_db']
21
+ courses_collection = db['courses']
22
+
23
+ def generate_perplexity_response(api_key, course_name, duration_weeks, sessions_per_week):
24
+ headers = {
25
+ "accept": "application/json",
26
+ "content-type": "application/json",
27
+ "authorization": f"Bearer {api_key}"
28
+ }
29
+
30
+ # Calculate sessions based on duration
31
+ total_sessions = duration_weeks * sessions_per_week # Assuming 2 sessions per week
32
+
33
+ prompt = f"""
34
+ You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate a comprehensive, academically rigorous course structure for the course {course_name} that fits exactly within {duration_weeks} weeks with {total_sessions} total sessions ({sessions_per_week} sessions per week).
35
+
36
+ Please generate a detailed course structure in JSON format following these specifications:
37
+
38
+ 1. The course structure must be designed for exactly {duration_weeks} weeks with {total_sessions} total sessions
39
+ 2. Each module should contain an appropriate number of sessions that sum up to exactly {total_sessions}
40
+ 3. Each session should be designed for a 1-1.5-hour class duration
41
+ 4. Follow standard academic practices and nomenclature
42
+ 5. Ensure progressive complexity from foundational to advanced concepts
43
+ 6. The course_title should exactly match the course name provided
44
+ 7. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
45
+ 8. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
46
+
47
+ The JSON response should follow this structure:
48
+ {{
49
+ "course_title": "string",
50
+ "course_description": "string",
51
+ "total_duration_weeks": {duration_weeks},
52
+ "sessions_per_week": {sessions_per_week},
53
+ "total_sessions": {total_sessions},
54
+ "modules": [
55
+ {{
56
+ "module_title": "string",
57
+ "module_duration_sessions": number,
58
+ "sub_modules": [
59
+ {{
60
+ "title": "string",
61
+ "topics": [
62
+ {{
63
+ "title": "string",
64
+ "short_description": "string",
65
+ "concise_learning_objectives": ["string"]
66
+ }}
67
+ ]
68
+ }}
69
+ ]
70
+ }}
71
+ ]
72
+ }}
73
+
74
+ Ensure that:
75
+ 1. The sum of all module_duration_sessions equals exactly {total_sessions}
76
+ 2. Each topic has clear learning objectives
77
+ 3. Topics build upon each other logically
78
+ 4. Content is distributed evenly across the available sessions
79
+ 5. **This Instruction is Strictly followed: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.****
80
+
81
+ """
82
+
83
+ messages = [
84
+ {
85
+ "role": "system",
86
+ "content": (
87
+ "You are an expert educational AI assistant specializing in course design and curriculum planning. "
88
+ "Your task is to generate accurate, detailed, and structured educational content that precisely fits "
89
+ "the specified duration."
90
+ ),
91
+ },
92
+ {
93
+ "role": "user",
94
+ "content": prompt
95
+ },
96
+ ]
97
+
98
+ try:
99
+ client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
100
+ response = client.chat.completions.create(
101
+ model="llama-3.1-sonar-small-128k-online",
102
+ messages=messages
103
+ )
104
+ content = response.choices[0].message.content
105
+
106
+ # Validate session count
107
+ course_plan = json.loads(content)
108
+ total_planned_sessions = sum(
109
+ module.get('module_duration_sessions', 0)
110
+ for module in course_plan.get('modules', [])
111
+ )
112
+
113
+ if abs(total_planned_sessions - total_sessions) > 5:
114
+ raise ValueError(f"Generated plan has {total_planned_sessions} sessions, but {total_sessions} were requested")
115
+
116
+ return content
117
+ except Exception as e:
118
+ st.error(f"Failed to fetch data from Perplexity API: {e}")
119
+ return ""
120
+
121
+ def generate_session_resources(api_key, session_titles: List[str]):
122
+ """
123
+ Generate relevant resources for each session title separately
124
+ """
125
+ resources_prompt = f"""
126
+ You are an expert educational content curator. For each session title provided, suggest highly relevant and accurate learning resources.
127
+ Please provide resources for these sessions: {session_titles}
128
+
129
+ For each session, provide resources in this JSON format:
130
+ {{
131
+ "session_resources": [
132
+ {{
133
+ "session_title": "string",
134
+ "resources": {{
135
+ "readings": [
136
+ {{
137
+ "title": "string",
138
+ "url": "string",
139
+ "type": "string",
140
+ "estimated_read_time": "string"
141
+ }}
142
+ ],
143
+ "books": [
144
+ {{
145
+ "title": "string",
146
+ "author": "string",
147
+ "isbn": "string",
148
+ "chapters": "string"
149
+ }}
150
+ ],
151
+ "additional_resources": [
152
+ {{
153
+ "title": "string",
154
+ "url": "string",
155
+ "type": "string",
156
+ "description": "string"
157
+ }}
158
+ ]
159
+ }}
160
+ }}
161
+ ]
162
+ }}
163
+
164
+ Guidelines:
165
+ 1. Ensure all URLs are real and currently active
166
+ 2. Prioritize high-quality, authoritative sources
167
+ 3. Include 1-2 resources of each type
168
+ 5. For readings, include a mix of academic and practical resources. It can exceed to 3-4 readings
169
+ 6. Book references should be real, recently published works
170
+ 7. Additional resources can include tools, documentation, or practice platforms
171
+ 8. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
172
+ 9. ***NOTE: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
173
+ """
174
+
175
+ messages = [
176
+ {
177
+ "role": "system",
178
+ "content": "You are an expert educational content curator, focused on providing accurate and relevant learning resources.",
179
+ },
180
+ {
181
+ "role": "user",
182
+ "content": resources_prompt
183
+ },
184
+ ]
185
+
186
+ try:
187
+ client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
188
+ response = client.chat.completions.create(
189
+ model="llama-3.1-sonar-small-128k-online",
190
+ messages=messages
191
+ )
192
+ print("Response is: \n", response.choices[0].message.content)
193
+ # try:
194
+ # return json.loads(response.choices[0].message.content)
195
+ # except json.JSONDecodeError as e:
196
+ # st.error(f"Failed to decode JSON response: {e}")
197
+ # return None
198
+ return response.choices[0].message.content
199
+ except Exception as e:
200
+ st.error(f"Failed to generate resources: {e}")
201
+ return None
202
+
203
+ def validate_course_plan(course_plan):
204
+ required_fields = ['course_title', 'course_description', 'modules']
205
+ if not all(field in course_plan for field in required_fields):
206
+ raise ValueError("Invalid course plan structure")
207
+
208
+ for module in course_plan['modules']:
209
+ if 'module_title' not in module or 'sub_modules' not in module:
210
+ raise ValueError("Invalid module structure")
211
+
212
+ def create_session(title: str, date: datetime, module_name: str, resources: dict):
213
+ """Create a session document with pre-class, in-class, and post-class components."""
214
+ return {
215
+ "session_id": ObjectId(),
216
+ "title": title,
217
+ "date": date,
218
+ "status": "upcoming",
219
+ "created_at": datetime.utcnow(),
220
+ "module_name": module_name,
221
+ "pre_class": {
222
+ "resources": [],
223
+ "completion_required": True
224
+ },
225
+ "in_class": {
226
+ "quiz": [],
227
+ "polls": []
228
+ },
229
+ "post_class": {
230
+ "assignments": []
231
+ },
232
+ "external_resources": {
233
+ "readings": resources.get("readings", []),
234
+ "books": resources.get("books", []),
235
+ "additional_resources": resources.get("additional_resources", [])
236
+ }
237
+ }
238
+
239
+ def create_course(course_name: str, start_date: datetime, duration_weeks: int, sessions_per_week: int):
240
+ # First generate a course plan using Perplexity API
241
+ # course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name, duration_weeks, sessions_per_week)
242
+ # course_plan_json = json.loads(course_plan)
243
+
244
+ # print("Course Structure is: \n", course_plan_json);
245
+
246
+ # Earlier Code:
247
+ # Generate sessions for each module with resources
248
+ # all_sessions = []
249
+ # current_date = start_date
250
+
251
+ # for module in course_plan_json['modules']:
252
+ # for sub_module in module['sub_modules']:
253
+ # for topic in sub_module['topics']:
254
+ # session = create_session(
255
+ # title=topic['title'],
256
+ # date=current_date,
257
+ # module_name=module['module_title'],
258
+ # resources=topic['resources']
259
+ # )
260
+ # all_sessions.append(session)
261
+ # current_date += timedelta(days=3.5) # Spacing sessions evenly across the week
262
+
263
+ # return course_plan_json, all_sessions
264
+
265
+ # New Code:
266
+ # Extract all session titles
267
+ session_titles = []
268
+ # Load the course plan JSON
269
+ course_plan_json = {}
270
+ with open('sample_files/sample_course.json', 'r') as file:
271
+ course_plan_json = json.load(file)
272
+
273
+ for module in course_plan_json['modules']:
274
+ for sub_module in module['sub_modules']:
275
+ for topic in sub_module['topics']:
276
+ session_titles.append(topic['title'])
277
+
278
+ # Generate resources for all sessions
279
+ session_resources = generate_session_resources(PERPLEXITY_API_KEY, session_titles)
280
+ # print("Session Resources are: \n", session_resources)
281
+ resources = json.loads(session_resources)
282
+ # print("Resources JSON is: \n", resources_json)
283
+
284
+ # print("Session Resources are: \n", session_resources)
285
+
286
+ # Create a mapping of session titles to their resources
287
+
288
+ # Import Resources JSON
289
+ # resources = {}
290
+ # with open('sample_files/sample_course_resources.json', 'r') as file:
291
+ # resources = json.load(file)
292
+
293
+ resources_map = {
294
+ resource['session_title']: resource['resources']
295
+ for resource in resources['session_resources']
296
+ }
297
+ print("Resources Map is: \n", resources_map)
298
+ # print("Sample is: ", resources_map.get('Overview of ML Concepts, History, and Applications'));
299
+ # Generate sessions with their corresponding resources
300
+ all_sessions = []
301
+ current_date = start_date
302
+
303
+ for module in course_plan_json['modules']:
304
+ for sub_module in module['sub_modules']:
305
+ for topic in sub_module['topics']:
306
+ session = create_session(
307
+ title=topic['title'],
308
+ date=current_date,
309
+ module_name=module['module_title'],
310
+ resources=resources_map.get(topic['title'], {})
311
+ )
312
+ all_sessions.append(session)
313
+ current_date += timedelta(days=3.5)
314
+
315
+ print("All Sessions are: \n", all_sessions)
316
+
317
+ def get_new_course_id():
318
+ """Generate a new course ID by incrementing the last course ID"""
319
+ last_course = courses_collection.find_one(sort=[("course_id", -1)])
320
+ if last_course:
321
+ last_course_id = int(last_course["course_id"][2:])
322
+ new_course_id = f"CS{last_course_id + 1}"
323
+ else:
324
+ new_course_id = "CS101"
325
+ return new_course_id
326
+
327
+ # if __name__ == "__main__":
328
+ # course_name = "Introduction to Machine Learning"
329
+ # start_date = datetime(2022, 9, 1)
330
+ # duration_weeks = 4
331
+ # create_course(course_name, start_date, duration_weeks, 3)
db.py ADDED
@@ -0,0 +1,696 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Setup for MongoDB
2
+ from pymongo import MongoClient
3
+ from datetime import datetime
4
+ from werkzeug.security import generate_password_hash
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+ MONGO_URI = os.getenv("MONGO_URI")
10
+
11
+ client = MongoClient(MONGO_URI)
12
+ try:
13
+ client.admin.command("ping")
14
+ print("MongoDB connection successful")
15
+ except Exception as e:
16
+ print(f"MongoDB connection failed: {e}")
17
+
18
+ db = client["novascholar_db"]
19
+
20
+ ########
21
+ # Research Assistant Schema
22
+ research_assistant_schema = {
23
+ "bsonType": "object",
24
+ "required": ["full_name", "password", "email", "courses_assisted"],
25
+ "properties": {
26
+ "full_name": {
27
+ "bsonType": "string",
28
+ "description": "Full name of the research assistant",
29
+ },
30
+ "password": {
31
+ "bsonType": "string",
32
+ "description": "Hashed password of the research assistant",
33
+ },
34
+ "email": {
35
+ "bsonType": "string",
36
+ "description": "Email address of the research assistant",
37
+ },
38
+ "courses_assisted": {
39
+ "bsonType": "array",
40
+ "description": "List of courses the research assistant is assisting",
41
+ "items": {
42
+ "bsonType": "object",
43
+ "required": ["course_id"],
44
+ "properties": {
45
+ "course_id": {
46
+ "bsonType": "string",
47
+ "description": "ID of the course",
48
+ }
49
+ },
50
+ },
51
+ },
52
+ },
53
+ }
54
+
55
+ # Create research assistants collection
56
+ research_assistants_collection = db["research_assistants"]
57
+
58
+ # Create indexes
59
+ research_assistants_collection.create_index("full_name", unique=True)
60
+ research_assistants_collection.create_index("email", unique=True)
61
+
62
+
63
+ # Optional: Sample data insertion function
64
+ def insert_sample_research_assistants():
65
+ sample_research_assistants = [
66
+ {
67
+ "full_name": "John Doe RA",
68
+ "password": generate_password_hash("password123"),
69
+ "email": "[email protected]",
70
+ "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
71
+ }
72
+ ]
73
+
74
+ try:
75
+ research_assistants_collection.insert_many(sample_research_assistants)
76
+ print("Sample research assistants inserted successfully!")
77
+ except Exception as e:
78
+ print(f"Error inserting sample research assistants: {e}")
79
+
80
+
81
+ ###########
82
+
83
+ ###############
84
+ # Add after research assistant schema
85
+
86
+ # Analyst Schema
87
+ analyst_schema = {
88
+ "bsonType": "object",
89
+ "required": ["full_name", "password", "email", "courses_analyzed"],
90
+ "properties": {
91
+ "full_name": {"bsonType": "string", "description": "Full name of the analyst"},
92
+ "password": {
93
+ "bsonType": "string",
94
+ "description": "Hashed password of the analyst",
95
+ },
96
+ "email": {"bsonType": "string", "description": "Email address of the analyst"},
97
+ "courses_analyzed": {
98
+ "bsonType": "array",
99
+ "description": "List of courses the analyst is analyzing",
100
+ "items": {
101
+ "bsonType": "object",
102
+ "required": ["course_id"],
103
+ "properties": {
104
+ "course_id": {
105
+ "bsonType": "string",
106
+ "description": "ID of the course",
107
+ }
108
+ },
109
+ },
110
+ },
111
+ },
112
+ }
113
+
114
+ # Create analysts collection
115
+ analysts_collection = db["analysts"]
116
+
117
+ # Create indexes for analysts
118
+ analysts_collection.create_index("full_name", unique=True)
119
+ analysts_collection.create_index("email", unique=True)
120
+
121
+
122
+ def insert_sample_analysts():
123
+ sample_analysts = [
124
+ {
125
+ "full_name": "jane",
126
+ "password": generate_password_hash("jane"),
127
+ "email": "[email protected]",
128
+ "courses_analyzed": [{"course_id": "CS101"}, {"course_id": "CS102"}],
129
+ }
130
+ ]
131
+
132
+ try:
133
+ analysts_collection.insert_many(sample_analysts)
134
+ print("Sample analysts inserted successfully!")
135
+ except Exception as e:
136
+ print(f"Error inserting sample analysts: {e}")
137
+
138
+
139
+ ##############@
140
+
141
+
142
+ # Define the course schema
143
+ course_schema = {
144
+ "bsonType": "object",
145
+ "required": [
146
+ "course_id",
147
+ "title",
148
+ "description",
149
+ "faculty",
150
+ "faculty_id",
151
+ "duration",
152
+ "created_at",
153
+ ],
154
+ "properties": {
155
+ "course_id": {
156
+ "bsonType": "string",
157
+ "description": "Unique identifier for the course",
158
+ },
159
+ "title": {"bsonType": "string", "description": "Title of the course"},
160
+ "description": {
161
+ "bsonType": "string",
162
+ "description": "Description of the course",
163
+ },
164
+ "faculty": {"bsonType": "string", "description": "Name of the faculty"},
165
+ "duration": {"bsonType": "string", "description": "Duration of the course"},
166
+ "created_at": {
167
+ "bsonType": "date",
168
+ "description": "Date when the course was created",
169
+ },
170
+ "sessions": {
171
+ "bsonType": "array",
172
+ "description": "List of sessions associated with the course",
173
+ "items": {
174
+ "bsonType": "object",
175
+ "required": ["session_id", "title", "date", "status", "created_at"],
176
+ "properties": {
177
+ "session_id": {
178
+ "bsonType": "string",
179
+ "description": "Unique identifier for the session",
180
+ },
181
+ "title": {
182
+ "bsonType": "string",
183
+ "description": "Title of the session",
184
+ },
185
+ "date": {"bsonType": "date", "description": "Date of the session"},
186
+ "status": {
187
+ "bsonType": "string",
188
+ "description": "Status of the session (e.g., completed, upcoming)",
189
+ },
190
+ "created_at": {
191
+ "bsonType": "date",
192
+ "description": "Date when the session was created",
193
+ },
194
+ "pre_class": {
195
+ "bsonType": "object",
196
+ "description": "Pre-class segment data",
197
+ "properties": {
198
+ "resources": {
199
+ "bsonType": "array",
200
+ "description": "List of pre-class resources",
201
+ "items": {
202
+ "bsonType": "object",
203
+ "required": ["type", "title", "url"],
204
+ "properties": {
205
+ "type": {
206
+ "bsonType": "string",
207
+ "description": "Type of resource (e.g., pdf, video)",
208
+ },
209
+ "title": {
210
+ "bsonType": "string",
211
+ "description": "Title of the resource",
212
+ },
213
+ "url": {
214
+ "bsonType": "string",
215
+ "description": "URL of the resource",
216
+ },
217
+ "vector": {
218
+ "bsonType": "array",
219
+ "description": "Vector representation of the resource",
220
+ "items": {"bsonType": "double"},
221
+ },
222
+ },
223
+ },
224
+ },
225
+ "completion_required": {
226
+ "bsonType": "bool",
227
+ "description": "Indicates if completion of pre-class resources is required",
228
+ },
229
+ },
230
+ },
231
+ "in_class": {
232
+ "bsonType": "object",
233
+ "description": "In-class segment data",
234
+ "properties": {
235
+ "topics": {
236
+ "bsonType": "array",
237
+ "description": "List of topics covered in the session",
238
+ "items": {"bsonType": "string"},
239
+ },
240
+ "quiz": {
241
+ "bsonType": "object",
242
+ "description": "Quiz data",
243
+ "properties": {
244
+ "title": {
245
+ "bsonType": "string",
246
+ "description": "Title of the quiz",
247
+ },
248
+ "questions": {
249
+ "bsonType": "int",
250
+ "description": "Number of questions in the quiz",
251
+ },
252
+ "duration": {
253
+ "bsonType": "int",
254
+ "description": "Duration of the quiz in minutes",
255
+ },
256
+ },
257
+ },
258
+ "polls": {
259
+ "bsonType": "array",
260
+ "description": "List of polls conducted during the session",
261
+ "items": {
262
+ "bsonType": "object",
263
+ "required": ["question", "options"],
264
+ "properties": {
265
+ "question": {
266
+ "bsonType": "string",
267
+ "description": "Poll question",
268
+ },
269
+ "options": {
270
+ "bsonType": "array",
271
+ "description": "List of poll options",
272
+ "items": {"bsonType": "string"},
273
+ },
274
+ "responses": {
275
+ "bsonType": "object",
276
+ "description": "Responses to the poll",
277
+ "additionalProperties": {"bsonType": "int"},
278
+ },
279
+ },
280
+ },
281
+ },
282
+ },
283
+ },
284
+ "post_class": {
285
+ "bsonType": "object",
286
+ "description": "Post-class segment data",
287
+ "properties": {
288
+ "assignments": {
289
+ "bsonType": "array",
290
+ "description": "List of assignments",
291
+ "items": {
292
+ "bsonType": "object",
293
+ "required": ["id", "title", "due_date", "status"],
294
+ "properties": {
295
+ "id": {
296
+ "bsonType": "int",
297
+ "description": "Assignment ID",
298
+ },
299
+ "title": {
300
+ "bsonType": "string",
301
+ "description": "Title of the assignment",
302
+ },
303
+ "due_date": {
304
+ "bsonType": "date",
305
+ "description": "Due date of the assignment",
306
+ },
307
+ "status": {
308
+ "bsonType": "string",
309
+ "description": "Status of the assignment (e.g., pending, completed)",
310
+ },
311
+ "submissions": {
312
+ "bsonType": "array",
313
+ "description": "List of submissions",
314
+ "items": {
315
+ "bsonType": "object",
316
+ "required": [
317
+ "student_id",
318
+ "file_url",
319
+ "submitted_at",
320
+ ],
321
+ "properties": {
322
+ "student_id": {
323
+ "bsonType": "string",
324
+ "description": "ID of the student who submitted the assignment",
325
+ },
326
+ "file_url": {
327
+ "bsonType": "string",
328
+ "description": "URL of the submitted file",
329
+ },
330
+ "submitted_at": {
331
+ "bsonType": "date",
332
+ "description": "Date when the assignment was submitted",
333
+ },
334
+ },
335
+ },
336
+ },
337
+ },
338
+ },
339
+ }
340
+ },
341
+ },
342
+ },
343
+ },
344
+ },
345
+ },
346
+ }
347
+
348
+ # Create the collection with the schema
349
+ # db.create_collection("courses_collection2", validator={"$jsonSchema": course_schema})
350
+
351
+ # sample_course = {
352
+ # "course_id": "CS101",
353
+ # "title": "Introduction to Computer Science",
354
+ # "description": "This course covers the basics of computer science and programming.",
355
+ # "faculty": "Dr. John Doe",
356
+ # "faculty_id": "F101",
357
+ # "duration": "10 weeks",
358
+ # "created_at": datetime.utcnow(),
359
+ # "sessions": [
360
+ # {
361
+ # "session_id": "S101",
362
+ # "title": "Introduction to Programming Fundamentals",
363
+ # "date": datetime.utcnow() - timedelta(days=7),
364
+ # "status": "completed",
365
+ # "created_at": datetime.utcnow() - timedelta(days=7),
366
+ # "pre_class": {
367
+ # "resources": [
368
+ # {
369
+ # "type": "pdf",
370
+ # "title": "Introduction to Python Basics",
371
+ # "url": "/assets/python_basics.pdf",
372
+ # "vector": [0.1, 0.2, 0.3] # Example vector
373
+ # }
374
+ # ],
375
+ # "completion_required": True
376
+ # },
377
+ # "in_class": {
378
+ # "topics": ["Variables", "Data Types", "Basic Operations"],
379
+ # "quiz": {
380
+ # "title": "Python Basics Quiz",
381
+ # "questions": 5,
382
+ # "duration": 15
383
+ # },
384
+ # "polls": [
385
+ # {
386
+ # "question": "How comfortable are you with Python syntax?",
387
+ # "options": ["Very", "Somewhat", "Not at all"],
388
+ # "responses": {"Very": 10, "Somewhat": 5, "Not at all": 2}
389
+ # }
390
+ # ]
391
+ # },
392
+ # "post_class": {
393
+ # "assignments": [
394
+ # {
395
+ # "id": 1,
396
+ # "title": "Basic Python Programs",
397
+ # "due_date": datetime.utcnow() + timedelta(days=2),
398
+ # "status": "pending",
399
+ # "submissions": []
400
+ # }
401
+ # ]
402
+ # }
403
+ # },
404
+ # {
405
+ # "session_id": "S102",
406
+ # "title": "Control Flow and Functions",
407
+ # "date": datetime.utcnow() - timedelta(days=3),
408
+ # "status": "completed",
409
+ # "created_at": datetime.utcnow() - timedelta(days=3),
410
+ # "pre_class": {
411
+ # "resources": [
412
+ # {
413
+ # "type": "pdf",
414
+ # "title": "Control Flow in Python",
415
+ # "url": "/assets/control_flow.pdf",
416
+ # "vector": [0.4, 0.5, 0.6] # Example vector
417
+ # }
418
+ # ],
419
+ # "completion_required": True
420
+ # },
421
+ # "in_class": {
422
+ # "topics": ["If-else statements", "Loops", "Function definitions"],
423
+ # "quiz": {
424
+ # "title": "Control Flow Quiz",
425
+ # "questions": 8,
426
+ # "duration": 20
427
+ # },
428
+ # "polls": [
429
+ # {
430
+ # "question": "Which loop type do you find more intuitive?",
431
+ # "options": ["For loops", "While loops", "Both"],
432
+ # "responses": {"For loops": 12, "While loops": 8, "Both": 10}
433
+ # }
434
+ # ]
435
+ # },
436
+ # "post_class": {
437
+ # "assignments": [
438
+ # {
439
+ # "id": 2,
440
+ # "title": "Function Implementation Exercise",
441
+ # "due_date": datetime.utcnow() + timedelta(days=4),
442
+ # "status": "pending",
443
+ # "submissions": []
444
+ # }
445
+ # ]
446
+ # }
447
+ # }
448
+ # ]
449
+ # }
450
+ courses_collection2 = db["courses_collection2"]
451
+
452
+
453
+ # Define the users schema
454
+ users_schema = {
455
+ "bsonType": "object",
456
+ "required": ["user_id", "username", "password", "role", "created_at"],
457
+ "properties": {
458
+ "user_id": {
459
+ "bsonType": "string",
460
+ "description": "Unique identifier for the user",
461
+ },
462
+ "username": {"bsonType": "string", "description": "Name of the User"},
463
+ "password": {"bsonType": "string", "description": "Password of the user"},
464
+ "role": {
465
+ "bsonType": "string",
466
+ "description": "Type of user (e.g., student, faculty)",
467
+ },
468
+ "created_at": {
469
+ "bsonType": "date",
470
+ "description": "Date when the user was created",
471
+ },
472
+ },
473
+ }
474
+ # Create the collection with the schema
475
+ # db.create_collection("users", validator={"$jsonSchema": users_schema})
476
+ users_collection = db["users"]
477
+
478
+
479
+ # Defining the Student Collection
480
+ student_schema = {
481
+ "bsonType": "object",
482
+ "required": ["SID", "full_name", "password", "enrolled_courses", "created_at"],
483
+ "properties": {
484
+ "SID": {
485
+ "bsonType": "string",
486
+ "description": "Unique identifier for the student",
487
+ },
488
+ "full_name": {"bsonType": "string", "description": "Full name of the student"},
489
+ "password": {
490
+ "bsonType": "string",
491
+ "description": "Hashed password of the student",
492
+ },
493
+ "enrolled_courses": {
494
+ "bsonType": "array",
495
+ "description": "List of courses the student is enrolled in",
496
+ "items": {
497
+ "bsonType": "object",
498
+ "required": ["course_id", "title"],
499
+ "properties": {
500
+ "course_id": {
501
+ "bsonType": "string",
502
+ "description": "Unique identifier for the course",
503
+ },
504
+ "title": {
505
+ "bsonType": "string",
506
+ "description": "Title of the course",
507
+ },
508
+ },
509
+ },
510
+ },
511
+ "created_at": {
512
+ "bsonType": "date",
513
+ "description": "Date when the student was created",
514
+ },
515
+ },
516
+ }
517
+ # Defining the Faculty Collection
518
+ faculty_schema = {
519
+ "bsonType": "object",
520
+ "required": ["TID", "full_name", "password", "courses_taught", "created_at"],
521
+ "properties": {
522
+ "TID": {
523
+ "bsonType": "string",
524
+ "description": "Unique identifier for the faculty",
525
+ },
526
+ "full_name": {"bsonType": "string", "description": "Full name of the faculty"},
527
+ "password": {
528
+ "bsonType": "string",
529
+ "description": "Hashed password of the faculty",
530
+ },
531
+ "courses_taught": {
532
+ "bsonType": "array",
533
+ "description": "List of courses the faculty is teaching",
534
+ "items": {
535
+ "bsonType": "object",
536
+ "required": ["course_id", "title"],
537
+ "properties": {
538
+ "course_id": {
539
+ "bsonType": "string",
540
+ "description": "Unique identifier for the course",
541
+ },
542
+ "title": {
543
+ "bsonType": "string",
544
+ "description": "Title of the course",
545
+ },
546
+ },
547
+ },
548
+ },
549
+ "created_at": {
550
+ "bsonType": "date",
551
+ "description": "Date when the faculty was created",
552
+ },
553
+ },
554
+ }
555
+ # Creating the Collections
556
+ # db.create_collection("students", validator={"$jsonSchema": student_schema})
557
+ # db.create_collection("faculty", validator={"$jsonSchema": faculty_schema})
558
+
559
+ students_collection = db["students"]
560
+ faculty_collection = db["faculty"]
561
+
562
+ # Defining the Vector Collection Schema
563
+ vector_schema = {
564
+ "bsonType": "object",
565
+ "required": ["resource_id", "vector"],
566
+ "properties": {
567
+ "resource_id": {
568
+ "bsonType": "objectId",
569
+ "description": "Unique identifier for the resource",
570
+ },
571
+ "vector": {
572
+ "bsonType": "array",
573
+ "description": "Vector representation of the resource",
574
+ "items": {"bsonType": "double"},
575
+ },
576
+ "text": {"bsonType": "string", "description": "Text content of the resource"},
577
+ "created_at": {
578
+ "bsonType": "date",
579
+ "description": "Date when the vector was created",
580
+ },
581
+ },
582
+ }
583
+ # Creating the Vector Collection
584
+ # db.create_collection("vectors", validator={"$jsonSchema": vector_schema})
585
+ vectors_collection = db["vectors"]
586
+
587
+
588
+ # Creating a Chat-History Collection
589
+ # Creating a Chat-History Collection
590
+ chat_history_schema = {
591
+ "bsonType": "object",
592
+ "required": ["user_id", "session_id", "messages", "timestamp"],
593
+ "properties": {
594
+ "user_id": {
595
+ "bsonType": "objectId",
596
+ "description": "Unique identifier for the user",
597
+ },
598
+ "session_id": {
599
+ "bsonType": "string",
600
+ "description": "Identifier for the session",
601
+ },
602
+ "timestamp": {
603
+ "bsonType": "date",
604
+ "description": "Timestamp when the chat session started",
605
+ },
606
+ "messages": {
607
+ "bsonType": "array",
608
+ "description": "List of chat messages",
609
+ "items": {
610
+ "bsonType": "object",
611
+ "properties": {
612
+ "prompt": {
613
+ "bsonType": "string",
614
+ "description": "User's question or prompt",
615
+ },
616
+ "response": {
617
+ "bsonType": "string",
618
+ "description": "Assistant's response",
619
+ },
620
+ "timestamp": {
621
+ "bsonType": "date",
622
+ "description": "Timestamp of the message",
623
+ },
624
+ },
625
+ },
626
+ },
627
+ },
628
+ }
629
+
630
+ # Create the collection with the schema
631
+ # db.create_collection("chat_history", validator={"$jsonSchema": chat_history_schema})
632
+ chat_history_collection = db["chat_history"]
633
+
634
+
635
+ # Database setup for Research Assistant
636
+ # Research Assistant Schema
637
+ research_assistant_schema = {
638
+ "bsonType": "object",
639
+ "required": ["full_name", "password", "email", "courses_assisted"],
640
+ "properties": {
641
+ "full_name": {
642
+ "bsonType": "string",
643
+ "description": "Full name of the research assistant",
644
+ },
645
+ "password": {
646
+ "bsonType": "string",
647
+ "description": "Hashed password of the research assistant",
648
+ },
649
+ "email": {
650
+ "bsonType": "string",
651
+ "description": "Email address of the research assistant",
652
+ },
653
+ "courses_assisted": {
654
+ "bsonType": "array",
655
+ "description": "List of courses the research assistant is assisting",
656
+ "items": {
657
+ "bsonType": "object",
658
+ "required": ["course_id"],
659
+ "properties": {
660
+ "course_id": {
661
+ "bsonType": "string",
662
+ "description": "ID of the course",
663
+ }
664
+ },
665
+ },
666
+ },
667
+ },
668
+ }
669
+
670
+ # Create research assistants collection
671
+ research_assistants_collection = db["research_assistants"]
672
+
673
+ # Create indexes
674
+ research_assistants_collection.create_index("full_name", unique=True)
675
+ research_assistants_collection.create_index("email", unique=True)
676
+
677
+
678
+ # Optional: Sample data insertion function
679
+ # def insert_sample_research_assistants():
680
+ # sample_research_assistants = [
681
+ # {
682
+ # "full_name": "John Doe RA",
683
+ # "password": generate_password_hash("password123"),
684
+ # "email": "[email protected]",
685
+ # "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
686
+ # }
687
+ # ]
688
+
689
+ # try:
690
+ # research_assistants_collection.insert_many(sample_research_assistants)
691
+ # print("Sample research assistants inserted successfully!")
692
+ # except Exception as e:
693
+ # print(f"Error inserting sample research assistants: {e}")
694
+
695
+ # if __name__ == "__main__":
696
+ # insert_sample_analysts()
entire_download.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from pymongo import MongoClient
4
+ from dotenv import load_dotenv
5
+ import os
6
+
7
+ # 1. Load environment variables
8
+ load_dotenv()
9
+ MONGODB_URI = os.getenv(
10
+ "MONGODB_UR",
11
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
12
+ )
13
+
14
+ # 2. Create MongoDB connection
15
+ client = MongoClient(MONGODB_URI)
16
+ db = client["novascholar_db"]
17
+ collection = db["research_papers"]
18
+
19
+
20
+ def get_collection_data(paper_type: str):
21
+ """
22
+ Fetch all documents from the specified collection based on paper type.
23
+ """
24
+ try:
25
+ # Determine collection name based on paper type
26
+ collection_name = paper_type.replace(" ", "_").lower()
27
+ doc_collection = db[collection_name]
28
+
29
+ # Get all documents
30
+ docs = list(doc_collection.find())
31
+
32
+ # Convert ObjectId to string
33
+ for doc in docs:
34
+ doc["_id"] = str(doc["_id"])
35
+
36
+ return docs
37
+ except Exception as e:
38
+ st.error(f"Database Error: {str(e)}")
39
+ return None
40
+
41
+
42
+ def main():
43
+ st.title("MongoDB Collection Download")
44
+ st.write("Download all documents from the selected research paper collection")
45
+
46
+ # Dropdown to select the type of research paper
47
+ paper_type = st.selectbox(
48
+ "Select type of research paper:",
49
+ [
50
+ "Review Based Paper",
51
+ "Opinion/Perspective Based Paper",
52
+ "Empirical Research Paper",
53
+ "Research Paper (Other)",
54
+ ],
55
+ )
56
+
57
+ if st.button("Fetch Data"):
58
+ with st.spinner("Retrieving documents from MongoDB..."):
59
+ docs = get_collection_data(paper_type)
60
+
61
+ if docs:
62
+ # Convert to DataFrame
63
+ df = pd.DataFrame(docs)
64
+ # Convert lists to comma-separated strings for consistency
65
+ for col in df.columns:
66
+ if df[col].apply(lambda x: isinstance(x, list)).any():
67
+ df[col] = df[col].apply(
68
+ lambda x: (
69
+ ", ".join(map(str, x)) if isinstance(x, list) else x
70
+ )
71
+ )
72
+ st.success(
73
+ f"Successfully retrieved {len(df)} documents from '{paper_type}' collection."
74
+ )
75
+ st.dataframe(df)
76
+
77
+ # Provide option to download the data as CSV
78
+ csv = df.to_csv(index=False).encode("utf-8")
79
+ st.download_button(
80
+ label="Download CSV",
81
+ data=csv,
82
+ file_name=f"{paper_type.replace(' ', '_').lower()}_papers.csv",
83
+ mime="text/csv",
84
+ )
85
+ else:
86
+ st.warning(f"No documents found in the '{paper_type}' collection.")
87
+
88
+
89
+ if __name__ == "__main__":
90
+ main()
extract.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import PyPDF2
4
+ import io
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import requests
8
+ import time
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
13
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
14
+
15
+ def call_perplexity_api(prompt: str) -> str:
16
+ """Call Perplexity AI with a prompt, return the text response if successful."""
17
+ headers = {
18
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
19
+ "Content-Type": "application/json",
20
+ }
21
+
22
+ payload = {
23
+ "model": "llama-3.1-sonar-small-128k-chat",
24
+ "messages": [{"role": "user", "content": prompt}],
25
+ "temperature": 0.3,
26
+ }
27
+
28
+ try:
29
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
30
+ response.raise_for_status()
31
+ return response.json()["choices"][0]["message"]["content"]
32
+ except Exception as e:
33
+ st.error(f"API Error: {str(e)}")
34
+ return ""
35
+
36
+ def extract_text_from_pdf(pdf_file):
37
+ """Extract text content from a PDF file."""
38
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
39
+ text = ""
40
+ for page in pdf_reader.pages:
41
+ text += page.extract_text() + "\n"
42
+ return text
43
+
44
+ def analyze_paper(text: str, category: str) -> str:
45
+ """Generate a prompt and get analysis for a specific category."""
46
+ prompts = {
47
+ "Summarized Abstract": "Extract and summarize the abstract from this research paper:",
48
+ "Results": "What are the main results and findings from this research paper:",
49
+ "Summarized Introduction": "Summarize the introduction section of this research paper:",
50
+ "Methods Used": "What are the main methods and methodologies used in this research:",
51
+ "Literature Survey": "Summarize the literature review or related work from this paper:",
52
+ "Limitations": "What are the limitations mentioned in this research:",
53
+ "Contributions": "What are the main contributions of this research:",
54
+ "Practical Implications": "What are the practical implications of this research:",
55
+ "Objectives": "What are the main objectives of this research:",
56
+ "Findings": "What are the key findings from this research:",
57
+ "Future Research": "What future research directions are suggested in this paper:",
58
+ "Dependent Variables": "What are the dependent variables studied in this research:",
59
+ "Independent Variables": "What are the independent variables studied in this research:",
60
+ "Dataset": "What dataset(s) were used in this research:",
61
+ "Problem Statement": "What is the main problem statement or research question:",
62
+ "Challenges": "What challenges were faced or addressed in this research:",
63
+ "Applications": "What are the potential applications of this research:"
64
+ }
65
+
66
+ prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
67
+ return call_perplexity_api(prompt)
68
+
69
+ def main():
70
+ st.title("Research Paper Analysis Tool")
71
+
72
+ # File uploader
73
+ uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
74
+
75
+ if uploaded_files:
76
+ if st.button("Process Papers"):
77
+ # Initialize progress bar
78
+ progress_bar = st.progress(0)
79
+ status_text = st.empty()
80
+
81
+ # Initialize results dictionary
82
+ results = []
83
+
84
+ # Define categories
85
+ categories = [
86
+ "Summarized Abstract", "Results", "Summarized Introduction",
87
+ "Methods Used", "Literature Survey", "Limitations",
88
+ "Contributions", "Practical Implications", "Objectives",
89
+ "Findings", "Future Research", "Dependent Variables",
90
+ "Independent Variables", "Dataset", "Problem Statement",
91
+ "Challenges", "Applications"
92
+ ]
93
+
94
+ # Process each file
95
+ for i, file in enumerate(uploaded_files):
96
+ status_text.text(f"Processing {file.name}...")
97
+
98
+ # Extract text from PDF
99
+ text = extract_text_from_pdf(file)
100
+
101
+ # Initialize paper results
102
+ paper_results = {"Filename": file.name}
103
+
104
+ # Analyze each category
105
+ for j, category in enumerate(categories):
106
+ status_text.text(f"Processing {file.name} - {category}")
107
+ paper_results[category] = analyze_paper(text, category)
108
+
109
+ # Update progress
110
+ progress = (i * len(categories) + j + 1) / (len(uploaded_files) * len(categories))
111
+ progress_bar.progress(progress)
112
+
113
+ # Add small delay to avoid API rate limits
114
+ time.sleep(1)
115
+
116
+ results.append(paper_results)
117
+
118
+ # Create DataFrame
119
+ df = pd.DataFrame(results)
120
+
121
+ # Convert DataFrame to CSV
122
+ csv = df.to_csv(index=False)
123
+
124
+ # Create download button
125
+ st.download_button(
126
+ label="Download Results as CSV",
127
+ data=csv,
128
+ file_name="research_papers_analysis.csv",
129
+ mime="text/csv"
130
+ )
131
+
132
+ # Display results in the app
133
+ st.subheader("Analysis Results")
134
+ st.dataframe(df)
135
+
136
+ status_text.text("Processing complete!")
137
+ progress_bar.progress(1.0)
138
+
139
+ if __name__ == "__main__":
140
+ main()
file_upload_vectorize.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pymongo import MongoClient
2
+ from datetime import datetime
3
+ import openai
4
+ import google.generativeai as genai
5
+ import streamlit as st
6
+ from db import courses_collection2, faculty_collection, students_collection, vectors_collection
7
+ from PIL import Image
8
+ import PyPDF2, docx, io
9
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
10
+ from bson import ObjectId
11
+ from dotenv import load_dotenv
12
+ import os
13
+ from create_course import courses_collection
14
+
15
+ load_dotenv()
16
+ MONGO_URI = os.getenv('MONGO_URI')
17
+ OPENAI_KEY = os.getenv('OPENAI_KEY')
18
+ GEMINI_KEY = os.getenv('GEMINI_KEY')
19
+
20
+
21
+ client = MongoClient(MONGO_URI)
22
+ db = client['novascholar_db']
23
+ resources_collection = db['resources']
24
+
25
+ # Configure APIs
26
+ openai.api_key = OPENAI_KEY
27
+ genai.configure(api_key=GEMINI_KEY)
28
+ model = genai.GenerativeModel('gemini-pro')
29
+
30
+ def upload_resource(course_id, session_id, file_name, file_content, material_type):
31
+ # material_data = {
32
+ # "session_id": session_id,
33
+ # "course_id": course_id,
34
+ # "file_name": file_name,
35
+ # "file_content": file_content,
36
+ # "material_type": material_type,
37
+ # "uploaded_at": datetime.utcnow()
38
+ # }
39
+ # return resources_collection.insert_one(material_data)
40
+ # resource_id = ObjectId()
41
+
42
+ # Extract text content from the file
43
+ text_content = extract_text_from_file(file_content)
44
+
45
+ # Check if a resource with this file name already exists
46
+ existing_resource = resources_collection.find_one({
47
+ "session_id": session_id,
48
+ "file_name": file_name
49
+ })
50
+
51
+ if existing_resource:
52
+ return existing_resource["_id"]
53
+
54
+ # Read the file content
55
+ file_content.seek(0) # Reset the file pointer to the beginning
56
+ original_file_content = file_content.read()
57
+
58
+
59
+ resource_data = {
60
+ "_id": ObjectId(),
61
+ "course_id": course_id,
62
+ "session_id": session_id,
63
+ "file_name": file_name,
64
+ "file_type": file_content.type,
65
+ "text_content": text_content,
66
+ "file_content": original_file_content, # Store the original file content
67
+ "material_type": material_type,
68
+ "uploaded_at": datetime.utcnow()
69
+ }
70
+
71
+ resources_collection.insert_one(resource_data)
72
+ resource_id = resource_data["_id"]
73
+
74
+ courses_collection.update_one(
75
+ {
76
+ "course_id": course_id,
77
+ "sessions.session_id": session_id
78
+ },
79
+ {
80
+ "$push": {"sessions.$.pre_class.resources": resource_id}
81
+ }
82
+ )
83
+ # print("End of Upload Resource, Resource ID is: ", resource_id)
84
+ # return resource_id
85
+ if text_content:
86
+ create_vector_store(text_content, resource_id)
87
+ return resource_id
88
+
89
+ def assignment_submit(student_id, course_id, session_id, assignment_id, file_name, file_content, text_content, material_type):
90
+ # Read the file content
91
+ file_content.seek(0) # Reset the file pointer to the beginning
92
+ original_file_content = file_content.read()
93
+
94
+ assignment_data = {
95
+ "student_id": student_id,
96
+ "course_id": course_id,
97
+ "session_id": session_id,
98
+ "assignment_id": assignment_id,
99
+ "file_name": file_name,
100
+ "file_type": file_content.type,
101
+ "file_content": original_file_content, # Store the original file content
102
+ "text_content": text_content,
103
+ "material_type": material_type,
104
+ "submitted_at": datetime.utcnow(),
105
+ "file_url": "sample_url"
106
+ }
107
+ try:
108
+ courses_collection2.update_one(
109
+ {
110
+ "course_id": course_id,
111
+ "sessions.session_id": session_id,
112
+ "sessions.post_class.assignments.id": assignment_id
113
+ },
114
+ {
115
+ "$push": {"sessions.$.post_class.assignments.$[assignment].submissions": assignment_data}
116
+ },
117
+ array_filters=[{"assignment.id": assignment_id}]
118
+ )
119
+ return True
120
+ except Exception as db_error:
121
+ print(f"Error saving submission: {str(db_error)}")
122
+ return False
123
+
124
+ def extract_text_from_file(uploaded_file):
125
+ text = ""
126
+ file_type = uploaded_file.type
127
+
128
+ try:
129
+ if file_type == "text/plain":
130
+ text = uploaded_file.getvalue().decode("utf-8")
131
+ elif file_type == "application/pdf":
132
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file.getvalue()))
133
+ for page in pdf_reader.pages:
134
+ text += page.extract_text() + "\n"
135
+ elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
136
+ doc = docx.Document(io.BytesIO(uploaded_file.getvalue()))
137
+ for para in doc.paragraphs:
138
+ text += para.text + "\n"
139
+ return text
140
+ except Exception as e:
141
+ st.error(f"Error processing file: {str(e)}")
142
+ return None
143
+
144
+ def get_embedding(text):
145
+ response = openai.embeddings.create(
146
+ model="text-embedding-ada-002",
147
+ input=text
148
+ )
149
+ return response.data[0].embedding
150
+
151
+ def create_vector_store(text, resource_id):
152
+ # resource_object_id = ObjectId(resource_id)
153
+ # Ensure resource_id is an ObjectId
154
+ # if not isinstance(resource_id, ObjectId):
155
+ # resource_id = ObjectId(resource_id)
156
+
157
+ existing_vector = vectors_collection.find_one({
158
+ "resource_id": resource_id,
159
+ "text": text
160
+ })
161
+
162
+ if existing_vector:
163
+ print(f"Vector already exists for Resource ID: {resource_id}")
164
+ return
165
+
166
+ print(f"In Vector Store method, Resource ID is: {resource_id}")
167
+ document = Document(text=text)
168
+ embedding = get_embedding(text)
169
+
170
+ vector_data = {
171
+ "resource_id": resource_id,
172
+ "vector": embedding,
173
+ "text": text,
174
+ "created_at": datetime.utcnow()
175
+ }
176
+
177
+ vectors_collection.insert_one(vector_data)
178
+
179
+ # return VectorStoreIndex.from_documents([document])
gen_mcqs.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ from pymongo import MongoClient
3
+ from datetime import datetime
4
+ import openai
5
+ import google.generativeai as genai
6
+ from google.generativeai import GenerativeModel
7
+ from dotenv import load_dotenv
8
+ import os
9
+ from file_upload_vectorize import resources_collection, vectors_collection, courses_collection2, faculty_collection
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+ MONGO_URI = os.getenv('MONGO_URI')
14
+ OPENAI_KEY = os.getenv('OPENAI_KEY')
15
+ GEMINI_KEY = os.getenv('GEMINI_KEY')
16
+
17
+ # Configure APIs
18
+ openai.api_key = OPENAI_KEY
19
+ genai.configure(api_key=GEMINI_KEY)
20
+ model = genai.GenerativeModel('gemini-pro')
21
+
22
+ # Connect to MongoDB
23
+ client = MongoClient(MONGO_URI)
24
+ db = client['novascholar_db']
25
+ quizzes_collection = db["quizzes"]
26
+
27
+ def strip_code_markers(response_text):
28
+ """Strip off the markers ``` and python from a LLM model's response"""
29
+ if response_text.startswith("```python"):
30
+ response_text = response_text[len("```python"):].strip()
31
+ if response_text.startswith("```"):
32
+ response_text = response_text[len("```"):].strip()
33
+ if response_text.endswith("```"):
34
+ response_text = response_text[:-len("```")].strip()
35
+ return response_text
36
+
37
+
38
+ # New function to generate MCQs using Gemini
39
+ def generate_mcqs(context, num_questions, session_title, session_description):
40
+ """Generate MCQs either from context or session details"""
41
+ try:
42
+ # Initialize Gemini model
43
+ if context:
44
+ prompt = f"""
45
+ Based on the following content, generate {num_questions} multiple choice questions.
46
+ Format each question as a Python dictionary with the following structure:
47
+ {{
48
+ "question": "Question text here",
49
+ "options": ["A) option1", "B) option2", "C) option3", "D) option4"],
50
+ "correct_option": "A) option1" or "B) option2" or "C) option3" or "D) option4"
51
+ }}
52
+
53
+ Content:
54
+ {context}
55
+
56
+ Generate challenging but clear questions that test understanding of key concepts.
57
+ Return only the Python list of dictionaries.
58
+ """
59
+ else:
60
+ prompt = f"""
61
+ Generate {num_questions} multiple choice questions about the topic:
62
+ Title: {session_title}
63
+ Description: {session_description}
64
+
65
+ Format each question as a Python dictionary with the following structure:
66
+ {{
67
+ "question": "Question text here",
68
+ "options": ["A) option1", "B) option2", "C) option3", "D) option4"],
69
+ "correct_option": "A" or "B" or "C" or "D"
70
+ }}
71
+
72
+ Generate challenging but clear questions.
73
+ Return only the Python list of dictionaries without any additional formatting or markers
74
+ Do not write any other text, do not start the response with (```python), do not end the response with backticks(```)
75
+ A Sample response should look like this: Response Text: [
76
+ {
77
+ "question": "Which of the following is NOT a valid data type in C++?",
78
+ "options": ["int", "double", "boolean", "char"],
79
+ "correct_option": "C"
80
+ }
81
+ ] (Notice that there are no backticks(```) around the response and no (```python))
82
+ .
83
+ """
84
+
85
+ response = model.generate_content(prompt)
86
+ response_text = response.text.strip()
87
+ print("Response Text:", response_text)
88
+ modified_response_text = strip_code_markers(response_text)
89
+ print("Response Text Modified to:", modified_response_text)
90
+ # Extract and parse the response to get the list of MCQs
91
+ mcqs = ast.literal_eval(modified_response_text) # Be careful with eval, consider using ast.literal_eval for production
92
+ print(mcqs)
93
+ if not mcqs:
94
+ raise ValueError("No questions generated")
95
+ return mcqs
96
+ except Exception as e:
97
+ print(f"Error generating MCQs: , error: {e}")
98
+ return None
99
+
100
+ # New function to save quiz to database
101
+ def save_quiz(course_id, session_id, title, questions, user_id):
102
+ """Save quiz to database"""
103
+ try:
104
+ quiz_data = {
105
+ "user_id": user_id,
106
+ "course_id": course_id,
107
+ "session_id": session_id,
108
+ "title": title,
109
+ "questions": questions,
110
+ "created_at": datetime.utcnow(),
111
+ "status": "active",
112
+ "submissions": []
113
+ }
114
+ result = quizzes_collection.insert_one(quiz_data)
115
+ return result.inserted_id
116
+ except Exception as e:
117
+ print(f"Error saving quiz: {e}")
118
+ return None
119
+
120
+
121
+ def get_student_quiz_score(quiz_id, student_id):
122
+ """Get student's score for a specific quiz"""
123
+ quiz = quizzes_collection.find_one(
124
+ {
125
+ "_id": quiz_id,
126
+ "submissions.student_id": student_id
127
+ },
128
+ {"submissions.$": 1}
129
+ )
130
+ if quiz and quiz.get('submissions'):
131
+ return quiz['submissions'][0].get('score')
132
+ return None
133
+
134
+ # def submit_quiz_answers(quiz_id, student_id, student_answers):
135
+ # """Submit and score student's quiz answers"""
136
+ # quiz = quizzes_collection.find_one({"_id": quiz_id})
137
+ # if not quiz:
138
+ # return None
139
+
140
+ # # Calculate score
141
+ # correct_answers = 0
142
+ # total_questions = len(quiz['questions'])
143
+
144
+ # for q_idx, question in enumerate(quiz['questions']):
145
+ # if student_answers.get(str(q_idx)) == question['correct_option']:
146
+ # correct_answers += 1
147
+
148
+ # score = (correct_answers / total_questions) * 100
149
+
150
+ # # Store submission
151
+ # submission_data = {
152
+ # "student_id": student_id,
153
+ # "answers": student_answers,
154
+ # "score": score,
155
+ # "submitted_at": datetime.utcnow()
156
+ # }
157
+
158
+ # # Update quiz with submission
159
+ # quizzes_collection.update_one(
160
+ # {"_id": quiz_id},
161
+ # {
162
+ # "$push": {"submissions": submission_data}
163
+ # }
164
+ # )
165
+
166
+ # return score
167
+ def submit_quiz_answers(quiz_id, student_id, student_answers):
168
+ """Submit and score student's quiz answers"""
169
+ try:
170
+ quiz = quizzes_collection.find_one({"_id": quiz_id})
171
+ if not quiz:
172
+ return None
173
+
174
+ # Calculate score
175
+ correct_answers = 0
176
+ total_questions = len(quiz['questions'])
177
+
178
+ for q_idx, question in enumerate(quiz['questions']):
179
+ student_answer = student_answers.get(str(q_idx))
180
+ if student_answer: # Only check if answer was provided
181
+ # Extract the option letter (A, B, C, D) from the full answer string
182
+ answer_letter = student_answer.split(')')[0].strip()
183
+ if answer_letter == question['correct_option']:
184
+ correct_answers += 1
185
+
186
+ score = (correct_answers / total_questions) * 100
187
+
188
+ # Store submission
189
+ submission_data = {
190
+ "student_id": student_id,
191
+ "answers": student_answers,
192
+ "score": score,
193
+ "submitted_at": datetime.utcnow()
194
+ }
195
+
196
+ # Update quiz with submission
197
+ result = quizzes_collection.update_one(
198
+ {"_id": quiz_id},
199
+ {"$push": {"submissions": submission_data}}
200
+ )
201
+
202
+ return score if result.modified_count > 0 else None
203
+
204
+ except Exception as e:
205
+ print(f"Error submitting quiz: {e}")
206
+ return None
goals2.py ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from typing import List, Dict
3
+ import httpx
4
+ from pathlib import Path
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import json
8
+ import numpy as np
9
+ from pymongo import MongoClient
10
+ from openai import OpenAI
11
+ from datetime import datetime
12
+ import asyncio
13
+ import pandas as pd
14
+
15
+ # Load environment variables
16
+ load_dotenv()
17
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
18
+ MONGODB_URI = os.getenv("MONGO_URI")
19
+ OPENAI_API_KEY = os.getenv("OPENAI_KEY")
20
+
21
+ # Initialize MongoDB client
22
+ client = MongoClient(MONGODB_URI)
23
+ db = client["document_analysis"]
24
+ vectors_collection = db["document_vectors"]
25
+
26
+ # Initialize OpenAI client
27
+ openai_client = OpenAI(api_key=OPENAI_API_KEY)
28
+
29
+
30
+ class GoalAnalyzer:
31
+ def __init__(self):
32
+ self.api_key = PERPLEXITY_API_KEY
33
+ self.base_url = "https://api.perplexity.ai/chat/completions"
34
+
35
+ def clean_json_string(self, content: str) -> str:
36
+ """Clean and extract valid JSON from string"""
37
+ # Remove markdown formatting
38
+ if "```json" in content:
39
+ content = content.split("```json")[1].split("```")[0]
40
+ elif "```" in content:
41
+ content = content.split("```")[1]
42
+
43
+ # Find the JSON object boundaries
44
+ start_idx = content.find("{")
45
+ end_idx = content.rfind("}") + 1
46
+
47
+ if start_idx != -1 and end_idx > 0:
48
+ content = content[start_idx:end_idx]
49
+
50
+ # Clean up common issues
51
+ content = content.strip()
52
+ content = content.replace("\n", "")
53
+ content = content.replace("'", '"')
54
+
55
+ return content
56
+
57
+ async def get_perplexity_analysis(self, text: str, goal: str) -> Dict:
58
+ """Get analysis from Perplexity API"""
59
+ headers = {
60
+ "Authorization": f"Bearer {self.api_key}",
61
+ "Content-Type": "application/json",
62
+ }
63
+
64
+ prompt = f"""
65
+ Analyze the following text in context of the goal: {goal}
66
+
67
+ Text: {text}
68
+
69
+ Provide analysis in the following JSON format:
70
+ {{
71
+ "themes": ["theme1", "theme2"],
72
+ "subthemes": {{"theme1": ["subtheme1", "subtheme2"], "theme2": ["subtheme3"]}},
73
+ "keywords": ["keyword1", "keyword2"],
74
+ "relevance_score": 0-100
75
+ }}
76
+ """
77
+
78
+ try:
79
+ async with httpx.AsyncClient() as client:
80
+ payload = {
81
+ "model": "llama-3.1-sonar-small-128k-chat", # Updated to supported model
82
+ "messages": [
83
+ {
84
+ "role": "system",
85
+ "content": "You are an AI assistant that analyzes documents and provides structured analysis.",
86
+ },
87
+ {"role": "user", "content": prompt},
88
+ ],
89
+ "max_tokens": 1024,
90
+ }
91
+
92
+ # Debug info using expander
93
+ with st.expander("Debug Info", expanded=False):
94
+ st.write("Request payload:", payload)
95
+
96
+ response = await client.post(
97
+ self.base_url, headers=headers, json=payload, timeout=30.0
98
+ )
99
+
100
+ # Debug response info
101
+ with st.expander("Response Info", expanded=False):
102
+ st.write("Response status:", response.status_code)
103
+ st.write("Response headers:", dict(response.headers))
104
+ st.write("Response content:", response.text)
105
+
106
+ if response.status_code != 200:
107
+ error_detail = (
108
+ response.json() if response.content else "No error details"
109
+ )
110
+ raise Exception(
111
+ f"API returned status code {response.status_code}. Details: {error_detail}"
112
+ )
113
+
114
+ result = response.json()
115
+ content = (
116
+ result.get("choices", [{}])[0].get("message", {}).get("content", "")
117
+ )
118
+
119
+ # Clean and parse JSON
120
+ cleaned_content = self.clean_json_string(content)
121
+
122
+ try:
123
+ analysis = json.loads(cleaned_content)
124
+
125
+ # Validate required fields
126
+ required_fields = [
127
+ "themes",
128
+ "subthemes",
129
+ "keywords",
130
+ "relevance_score",
131
+ ]
132
+ for field in required_fields:
133
+ if field not in analysis:
134
+ analysis[field] = [] if field != "relevance_score" else 0
135
+
136
+ return analysis
137
+
138
+ except json.JSONDecodeError as e:
139
+ st.error(f"JSON parsing error: {str(e)}")
140
+ st.error(f"Failed content: {cleaned_content}")
141
+ return {
142
+ "themes": ["Error parsing themes"],
143
+ "subthemes": {"Error": ["Failed to parse subthemes"]},
144
+ "keywords": ["parsing-error"],
145
+ "relevance_score": 0,
146
+ }
147
+
148
+ except Exception as e:
149
+ st.error(f"API Error: {str(e)}")
150
+ return None
151
+
152
+ def extract_text_from_file(self, file) -> str:
153
+ """Extract text content from uploaded file"""
154
+ try:
155
+ text = ""
156
+ file_type = file.type
157
+
158
+ if file_type == "text/plain":
159
+ text = file.getvalue().decode("utf-8")
160
+ elif file_type == "application/pdf":
161
+ import PyPDF2
162
+
163
+ pdf_reader = PyPDF2.PdfReader(file)
164
+ for page in pdf_reader.pages:
165
+ text += page.extract_text()
166
+ elif (
167
+ file_type
168
+ == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
169
+ ):
170
+ import docx
171
+
172
+ doc = docx.Document(file)
173
+ text = " ".join([paragraph.text for paragraph in doc.paragraphs])
174
+
175
+ return text
176
+ except Exception as e:
177
+ st.error(f"Error extracting text: {str(e)}")
178
+ return ""
179
+
180
+
181
+ class DocumentVectorizer:
182
+ def __init__(self):
183
+ self.model = "text-embedding-ada-002"
184
+ self.client = MongoClient(MONGODB_URI)
185
+ self.db = self.client["document_analysis"]
186
+ self.vectors_collection = self.db["document_vectors"]
187
+
188
+ # Create vector search index if it doesn't exist
189
+ try:
190
+ self.vectors_collection.create_index(
191
+ [("vector", "2dsphere")], # Changed to 2dsphere for vector indexing
192
+ {
193
+ "vectorSearchConfig": {
194
+ "dimensions": 1536, # OpenAI embedding dimensions
195
+ "similarity": "cosine",
196
+ }
197
+ },
198
+ )
199
+ except Exception as e:
200
+ st.warning(f"Vector index may already exist")
201
+
202
+ def get_embedding(self, text: str) -> list:
203
+ """Get embedding vector for text using OpenAI"""
204
+ try:
205
+ response = openai_client.embeddings.create(model=self.model, input=text)
206
+ return response.data[0].embedding
207
+ except Exception as e:
208
+ st.error(f"Error getting embedding: {str(e)}")
209
+ return None
210
+
211
+ # Add this method to DocumentVectorizer class
212
+ def vector_exists(self, doc_name: str) -> bool:
213
+ """Check if vector exists for document"""
214
+ return self.vectors_collection.count_documents({"name": doc_name}) > 0
215
+
216
+ # Update store_vector method in DocumentVectorizer class
217
+ def store_vector(self, doc_name: str, vector: list, text: str, goal: str = None):
218
+ """Store document/goal vector in MongoDB using upsert"""
219
+ try:
220
+ vector_doc = {
221
+ "name": doc_name,
222
+ "vector": vector,
223
+ "text": text,
224
+ "type": "document" if goal is None else "goal",
225
+ "goal": goal,
226
+ "updated_at": datetime.utcnow(),
227
+ }
228
+
229
+ # Use update_one with upsert
230
+ self.vectors_collection.update_one(
231
+ {"name": doc_name},
232
+ {"$set": vector_doc, "$setOnInsert": {"created_at": datetime.utcnow()}},
233
+ upsert=True,
234
+ )
235
+
236
+ except Exception as e:
237
+ st.error(f"Error storing vector: {str(e)}")
238
+
239
+ # Update vector_search method in DocumentVectorizer class
240
+ def vector_search(self, query_vector: List[float], limit: int = 5) -> List[Dict]:
241
+ """Search for similar documents using vector similarity"""
242
+ try:
243
+ # Get all documents
244
+ documents = list(self.vectors_collection.find({"type": "document"}))
245
+
246
+ # Calculate similarities
247
+ similarities = []
248
+ for doc in documents:
249
+ similarity = self.calculate_similarity(query_vector, doc["vector"])
250
+ similarities.append(
251
+ {
252
+ "name": doc["name"],
253
+ "text": doc["text"],
254
+ "similarity": similarity, # Keep as float
255
+ "similarity_display": f"{similarity*100:.1f}%", # Add display version
256
+ }
257
+ )
258
+
259
+ # Sort by similarity and get top k
260
+ sorted_docs = sorted(
261
+ similarities,
262
+ key=lambda x: x["similarity"], # Sort by float value
263
+ reverse=True,
264
+ )[:limit]
265
+
266
+ return sorted_docs
267
+
268
+ except Exception as e:
269
+ st.error(f"Vector search error: {str(e)}")
270
+ return []
271
+
272
+ def find_similar_documents(self, text: str, limit: int = 5) -> List[Dict]:
273
+ """Find similar documents for given text"""
274
+ vector = self.get_embedding(text)
275
+ if vector:
276
+ return self.vector_search(vector, limit)
277
+ return []
278
+
279
+ def calculate_similarity(self, vector1: list, vector2: list) -> float:
280
+ """Calculate cosine similarity between two vectors"""
281
+ return np.dot(vector1, vector2) / (
282
+ np.linalg.norm(vector1) * np.linalg.norm(vector2)
283
+ )
284
+
285
+
286
+ def display_analysis_results(analysis: Dict):
287
+ """Display analysis results in Streamlit UI"""
288
+ if not analysis:
289
+ return
290
+
291
+ # Display Themes
292
+ st.subheader("Themes")
293
+ for theme in analysis.get("themes", []):
294
+ with st.expander(f"🎯 {theme}"):
295
+ # Display subthemes for this theme
296
+ subthemes = analysis.get("subthemes", {}).get(theme, [])
297
+ if subthemes:
298
+ st.write("**Subthemes:**")
299
+ for subtheme in subthemes:
300
+ st.write(f"- {subtheme}")
301
+
302
+ # Display Keywords
303
+ st.subheader("Keywords")
304
+ keywords = analysis.get("keywords", [])
305
+ st.write(" | ".join([f"🔑 {keyword}" for keyword in keywords]))
306
+
307
+ # Display Relevance Score
308
+ score = analysis.get("relevance_score", 0)
309
+ st.metric("Relevance Score", f"{score}%")
310
+
311
+
312
+ def display_analyst_dashboard():
313
+ st.title("Multi-Goal Document Analysis")
314
+
315
+ with st.sidebar:
316
+ st.markdown("### Input Section")
317
+ tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
318
+ # tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
319
+
320
+ with tab1:
321
+ # Multiple goals input
322
+ num_goals = st.number_input("Number of goals:", min_value=1, value=1)
323
+ goals = []
324
+ for i in range(num_goals):
325
+ goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
326
+ if goal:
327
+ goals.append(goal)
328
+
329
+ uploaded_files = st.file_uploader(
330
+ "Upload documents",
331
+ accept_multiple_files=True,
332
+ type=["txt", "pdf", "docx"],
333
+ )
334
+ analyze_button = (
335
+ st.button("Analyze Documents") if goals and uploaded_files else None
336
+ )
337
+
338
+ with tab2:
339
+ # Keep existing similarity search tab
340
+ search_text = st.text_area("Enter text to find similar documents:")
341
+ search_limit = st.slider("Number of results", 1, 10, 5)
342
+ search_button = st.button("Search Similar") if search_text else None
343
+
344
+ if st.button("Logout", use_container_width=True):
345
+ for key in st.session_state.keys():
346
+ del st.session_state[key]
347
+ st.rerun()
348
+
349
+ if analyze_button:
350
+ analyzer = GoalAnalyzer()
351
+ vectorizer = DocumentVectorizer()
352
+
353
+ # Store vectors
354
+ doc_vectors = {}
355
+ goal_vectors = {}
356
+
357
+ # Process goals first
358
+ with st.spinner("Processing goals..."):
359
+ for i, goal in enumerate(goals):
360
+ vector = vectorizer.get_embedding(goal)
361
+ if vector:
362
+ goal_vectors[f"Goal {i+1}"] = vector
363
+ vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
364
+
365
+ # Process documents
366
+ with st.spinner("Processing documents..."):
367
+ for file in uploaded_files:
368
+ st.markdown(f"### Analysis for {file.name}")
369
+
370
+ if vectorizer.vector_exists(file.name):
371
+ st.info(f"Vector already exists for {file.name}")
372
+ existing_doc = vectorizer.vectors_collection.find_one(
373
+ {"name": file.name}
374
+ )
375
+ doc_vectors[file.name] = existing_doc["vector"]
376
+ else:
377
+ text = analyzer.extract_text_from_file(file)
378
+ if not text:
379
+ st.warning(f"Could not extract text from {file.name}")
380
+ continue
381
+
382
+ vector = vectorizer.get_embedding(text)
383
+ if vector:
384
+ doc_vectors[file.name] = vector
385
+ vectorizer.store_vector(file.name, vector, text)
386
+
387
+ # Display goal similarities
388
+ st.subheader("Goal Relevance Scores")
389
+ col1, col2 = st.columns([1, 2])
390
+
391
+ with col1:
392
+ for goal_name, goal_vector in goal_vectors.items():
393
+ similarity = (
394
+ vectorizer.calculate_similarity(
395
+ doc_vectors[file.name], goal_vector
396
+ )
397
+ * 100
398
+ )
399
+ st.metric(f"{goal_name}", f"{similarity:.1f}%")
400
+
401
+ with col2:
402
+ # Get analysis for all goals combined
403
+ analysis = asyncio.run(
404
+ analyzer.get_perplexity_analysis(text, " | ".join(goals))
405
+ )
406
+ display_analysis_results(analysis)
407
+
408
+ st.divider()
409
+
410
+ # Document similarity matrix
411
+ if len(doc_vectors) > 1:
412
+ st.markdown("### Document Similarity Matrix")
413
+ files = list(doc_vectors.keys())
414
+ similarity_matrix = []
415
+
416
+ for file1 in files:
417
+ row = []
418
+ for file2 in files:
419
+ similarity = vectorizer.calculate_similarity(
420
+ doc_vectors[file1], doc_vectors[file2]
421
+ )
422
+ row.append(similarity)
423
+ similarity_matrix.append(row)
424
+
425
+ df = pd.DataFrame(similarity_matrix, columns=files, index=files)
426
+ st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
427
+
428
+ # Add goal-document similarity matrix
429
+ st.markdown("### Goal-Document Similarity Matrix")
430
+ goal_doc_matrix = []
431
+ goal_names = list(goal_vectors.keys())
432
+
433
+ for file in files:
434
+ row = []
435
+ for goal in goal_names:
436
+ similarity = vectorizer.calculate_similarity(
437
+ doc_vectors[file], goal_vectors[goal]
438
+ )
439
+ row.append(similarity)
440
+ goal_doc_matrix.append(row)
441
+
442
+ df_goals = pd.DataFrame(
443
+ goal_doc_matrix, columns=goal_names, index=files
444
+ )
445
+ st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
446
+
447
+ # Keep existing similarity search functionality
448
+ elif search_button:
449
+ vectorizer = DocumentVectorizer()
450
+ with st.spinner("Searching similar documents..."):
451
+ query_vector = vectorizer.get_embedding(search_text)
452
+ if query_vector:
453
+ similar_docs = vectorizer.vector_search(query_vector, search_limit)
454
+
455
+ if similar_docs:
456
+ st.markdown("### Similar Documents Found")
457
+
458
+ # Create DataFrame with numeric similarities
459
+ df = pd.DataFrame(similar_docs)
460
+
461
+ # Apply gradient to numeric column
462
+ styled_df = df[["name", "similarity"]].style.background_gradient(
463
+ cmap="RdYlGn", subset=["similarity"]
464
+ )
465
+
466
+ # Format display after styling
467
+ styled_df = styled_df.format({"similarity": "{:.1%}"})
468
+
469
+ st.dataframe(styled_df)
470
+
471
+ # Show document contents
472
+ for doc in similar_docs:
473
+ with st.expander(
474
+ f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
475
+ ):
476
+ st.text(
477
+ doc["text"][:20] + "..."
478
+ if len(doc["text"]) > 20
479
+ else doc["text"]
480
+ )
481
+ else:
482
+ st.info("No similar documents found")
483
+ else:
484
+ st.error("Could not process search query")
485
+
486
+
487
+ def main():
488
+ st.title("Multi-Goal Document Analysis")
489
+
490
+ with st.sidebar:
491
+ st.markdown("### Input Section")
492
+ tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
493
+ # tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
494
+
495
+ with tab1:
496
+ # Multiple goals input
497
+ num_goals = st.number_input("Number of goals:", min_value=1, value=1)
498
+ goals = []
499
+ for i in range(num_goals):
500
+ goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
501
+ if goal:
502
+ goals.append(goal)
503
+
504
+ uploaded_files = st.file_uploader(
505
+ "Upload documents",
506
+ accept_multiple_files=True,
507
+ type=["txt", "pdf", "docx"],
508
+ )
509
+ analyze_button = (
510
+ st.button("Analyze Documents") if goals and uploaded_files else None
511
+ )
512
+
513
+ with tab2:
514
+ # Keep existing similarity search tab
515
+ search_text = st.text_area("Enter text to find similar documents:")
516
+ search_limit = st.slider("Number of results", 1, 10, 5)
517
+ search_button = st.button("Search Similar") if search_text else None
518
+
519
+ if analyze_button:
520
+ analyzer = GoalAnalyzer()
521
+ vectorizer = DocumentVectorizer()
522
+
523
+ # Store vectors
524
+ doc_vectors = {}
525
+ goal_vectors = {}
526
+
527
+ # Process goals first
528
+ with st.spinner("Processing goals..."):
529
+ for i, goal in enumerate(goals):
530
+ vector = vectorizer.get_embedding(goal)
531
+ if vector:
532
+ goal_vectors[f"Goal {i+1}"] = vector
533
+ vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
534
+
535
+ # Process documents
536
+ with st.spinner("Processing documents..."):
537
+ for file in uploaded_files:
538
+ st.markdown(f"### Analysis for {file.name}")
539
+
540
+ if vectorizer.vector_exists(file.name):
541
+ st.info(f"Vector already exists for {file.name}")
542
+ existing_doc = vectorizer.vectors_collection.find_one(
543
+ {"name": file.name}
544
+ )
545
+ doc_vectors[file.name] = existing_doc["vector"]
546
+ else:
547
+ text = analyzer.extract_text_from_file(file)
548
+ if not text:
549
+ st.warning(f"Could not extract text from {file.name}")
550
+ continue
551
+
552
+ vector = vectorizer.get_embedding(text)
553
+ if vector:
554
+ doc_vectors[file.name] = vector
555
+ vectorizer.store_vector(file.name, vector, text)
556
+
557
+ # Display goal similarities
558
+ st.subheader("Goal Relevance Scores")
559
+ col1, col2 = st.columns([1, 2])
560
+
561
+ with col1:
562
+ for goal_name, goal_vector in goal_vectors.items():
563
+ similarity = (
564
+ vectorizer.calculate_similarity(
565
+ doc_vectors[file.name], goal_vector
566
+ )
567
+ * 100
568
+ )
569
+ st.metric(f"{goal_name}", f"{similarity:.1f}%")
570
+
571
+ with col2:
572
+ # Get analysis for all goals combined
573
+ analysis = asyncio.run(
574
+ analyzer.get_perplexity_analysis(text, " | ".join(goals))
575
+ )
576
+ display_analysis_results(analysis)
577
+
578
+ st.divider()
579
+
580
+ # Document similarity matrix
581
+ if len(doc_vectors) > 1:
582
+ st.markdown("### Document Similarity Matrix")
583
+ files = list(doc_vectors.keys())
584
+ similarity_matrix = []
585
+
586
+ for file1 in files:
587
+ row = []
588
+ for file2 in files:
589
+ similarity = vectorizer.calculate_similarity(
590
+ doc_vectors[file1], doc_vectors[file2]
591
+ )
592
+ row.append(similarity)
593
+ similarity_matrix.append(row)
594
+
595
+ df = pd.DataFrame(similarity_matrix, columns=files, index=files)
596
+ st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
597
+
598
+ # Add goal-document similarity matrix
599
+ st.markdown("### Goal-Document Similarity Matrix")
600
+ goal_doc_matrix = []
601
+ goal_names = list(goal_vectors.keys())
602
+
603
+ for file in files:
604
+ row = []
605
+ for goal in goal_names:
606
+ similarity = vectorizer.calculate_similarity(
607
+ doc_vectors[file], goal_vectors[goal]
608
+ )
609
+ row.append(similarity)
610
+ goal_doc_matrix.append(row)
611
+
612
+ df_goals = pd.DataFrame(
613
+ goal_doc_matrix, columns=goal_names, index=files
614
+ )
615
+ st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
616
+
617
+ # Keep existing similarity search functionality
618
+ elif search_button:
619
+ vectorizer = DocumentVectorizer()
620
+ with st.spinner("Searching similar documents..."):
621
+ query_vector = vectorizer.get_embedding(search_text)
622
+ if query_vector:
623
+ similar_docs = vectorizer.vector_search(query_vector, search_limit)
624
+
625
+ if similar_docs:
626
+ st.markdown("### Similar Documents Found")
627
+
628
+ # Create DataFrame with numeric similarities
629
+ df = pd.DataFrame(similar_docs)
630
+
631
+ # Apply gradient to numeric column
632
+ styled_df = df[["name", "similarity"]].style.background_gradient(
633
+ cmap="RdYlGn", subset=["similarity"]
634
+ )
635
+
636
+ # Format display after styling
637
+ styled_df = styled_df.format({"similarity": "{:.1%}"})
638
+
639
+ st.dataframe(styled_df)
640
+
641
+ # Show document contents
642
+ for doc in similar_docs:
643
+ with st.expander(
644
+ f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
645
+ ):
646
+ st.text(
647
+ doc["text"][:20] + "..."
648
+ if len(doc["text"]) > 20
649
+ else doc["text"]
650
+ )
651
+ else:
652
+ st.info("No similar documents found")
653
+ else:
654
+ st.error("Could not process search query")
655
+
656
+
657
+ if __name__ == "__main__":
658
+ main()
infranew.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import networkx as nx
4
+ from bokeh.models import HoverTool
5
+ from bokeh.plotting import figure, from_networkx
6
+ import requests
7
+ import json
8
+ import google.generativeai as genai
9
+
10
+ PERPLEXITY_API_KEY = "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"
11
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
12
+
13
+
14
+ def extract_edges(keywords):
15
+ keywords = [kw.strip() for kw in keywords.split(",")]
16
+ edges = [
17
+ (keywords[i], keywords[j])
18
+ for i in range(len(keywords))
19
+ for j in range(i + 1, len(keywords))
20
+ ]
21
+ return edges
22
+
23
+
24
+ def create_knowledge_graph(data):
25
+ G = nx.Graph()
26
+
27
+ for _, row in data.iterrows():
28
+ words = []
29
+ for col in data.columns:
30
+ if pd.notnull(row[col]):
31
+ # Convert to string and handle numeric values
32
+ cell_value = str(row[col]).strip()
33
+ if cell_value:
34
+ words.extend(cell_value.split())
35
+
36
+ if words:
37
+ edges = extract_edges(",".join(words))
38
+ G.add_edges_from(edges)
39
+
40
+ for word in words:
41
+ word = word.strip()
42
+ if word not in G:
43
+ G.add_node(word, title=word, value=len(word))
44
+
45
+ return G
46
+
47
+
48
+ def render_graph_bokeh(G):
49
+ plot = figure(
50
+ title="Interactive Knowledge Graph",
51
+ x_range=(-1.5, 1.5),
52
+ y_range=(-1.5, 1.5),
53
+ tools="pan,wheel_zoom,box_zoom,reset,tap",
54
+ active_scroll="wheel_zoom",
55
+ )
56
+ plot.add_tools(HoverTool(tooltips="@index"))
57
+
58
+ graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0, 0))
59
+
60
+ graph_renderer.node_renderer.glyph.size = 10
61
+ graph_renderer.node_renderer.glyph.fill_color = "blue"
62
+ graph_renderer.node_renderer.glyph.line_color = "black"
63
+
64
+ graph_renderer.edge_renderer.glyph.line_width = 1
65
+ graph_renderer.edge_renderer.glyph.line_color = "gray"
66
+
67
+ plot.renderers.append(graph_renderer)
68
+
69
+ return plot
70
+
71
+
72
+ import re
73
+
74
+
75
+ def search_papers(topic: str, num_papers: int) -> list:
76
+ headers = {
77
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
78
+ "Content-Type": "application/json",
79
+ }
80
+
81
+ prompt = f"""Find {num_papers} recent research papers about {topic}.
82
+ Return ONLY a valid JSON array with the following structure for each paper:
83
+ [
84
+ {{
85
+ "Title": "paper title",
86
+ "Abstract": "abstract text",
87
+ "Keywords": "key terms"
88
+ }}
89
+ ]"""
90
+
91
+ payload = {
92
+ "model": "llama-3.1-sonar-small-128k-chat",
93
+ "messages": [
94
+ {
95
+ "role": "system",
96
+ "content": "You are a research paper analyzer that returns valid JSON arrays.",
97
+ },
98
+ {"role": "user", "content": prompt},
99
+ ],
100
+ "temperature": 0.1,
101
+ }
102
+
103
+ try:
104
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
105
+ response.raise_for_status()
106
+ content = response.json()["choices"][0]["message"]["content"]
107
+
108
+ # Clean response to ensure valid JSON
109
+ content = content.strip()
110
+ if not content.startswith("["):
111
+ content = content[content.find("[") :]
112
+ if not content.endswith("]"):
113
+ content = content[: content.rfind("]") + 1]
114
+
115
+ # Remove any trailing commas before closing brackets
116
+ content = re.sub(r",\s*]", "]", content)
117
+ content = re.sub(r",\s*}", "}", content)
118
+
119
+ papers = json.loads(content)
120
+ if not isinstance(papers, list):
121
+ raise ValueError("Response is not a JSON array")
122
+ return papers
123
+ except requests.exceptions.RequestException as e:
124
+ st.error(f"API Request Error: {str(e)}")
125
+ return []
126
+ except json.JSONDecodeError as e:
127
+ st.error(f"Invalid JSON response: {str(e)}")
128
+ st.error(f"Response content: {response.text}")
129
+ return []
130
+ except ValueError as e:
131
+ st.error(f"Error: {str(e)}")
132
+ return []
133
+
134
+
135
+ import os
136
+
137
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
138
+ GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
139
+
140
+
141
+ def call_gemini_api(prompt: str) -> str:
142
+ headers = {
143
+ "Authorization": f"Bearer {GEMINI_API_KEY}",
144
+ "Content-Type": "application/json",
145
+ }
146
+
147
+ payload = {
148
+ "prompt": prompt,
149
+ "max_tokens": 150,
150
+ "temperature": 0.7,
151
+ }
152
+
153
+ try:
154
+ model = genai.GenerativeModel("gemini-pro")
155
+ response = model.generate_content(prompt)
156
+ return response.text
157
+ except Exception as e:
158
+ st.error(f"Gemini API Error: {str(e)}")
159
+ return ""
160
+
161
+
162
+ def generate_gaps_paragraph(gaps):
163
+ prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps: {', '.join(gaps)}"
164
+ return call_gemini_api(prompt)
165
+
166
+
167
+ def generate_insights(G, topic):
168
+ papers = search_papers(topic, 5)
169
+ if papers:
170
+ st.write("### Research Insights from Perplexity API")
171
+ for paper in papers:
172
+ st.write(f"**Title:** {paper['Title']}")
173
+ st.write(f"**Abstract:** {paper['Abstract']}")
174
+ st.write(f"**Keywords:** {paper['Keywords']}")
175
+ st.write("---")
176
+
177
+ nodes = list(G.nodes(data=True))
178
+ insights = {}
179
+ insights["Strong Points"] = [
180
+ n for n, d in nodes if G.degree(n) > len(G.nodes) * 0.1
181
+ ]
182
+ insights["Weak Points"] = [n for n, d in nodes if G.degree(n) < len(G.nodes) * 0.05]
183
+ insights["Gaps"] = [n for n, d in nodes if len(list(nx.neighbors(G, n))) == 0]
184
+
185
+ st.write("### Graph-Based Insights")
186
+ st.write("**Strong Points:**", insights["Strong Points"])
187
+ st.write("**Weak Points:**", insights["Weak Points"])
188
+ st.write("**Gaps:**", insights["Gaps"])
189
+
190
+ if insights["Gaps"]:
191
+ with st.spinner("Generating insights about gaps..."):
192
+ gaps_paragraph = generate_gaps_paragraph(insights["Gaps"])
193
+ if gaps_paragraph:
194
+ st.write("### Gaps in Research")
195
+ st.write(gaps_paragraph)
196
+
197
+
198
+ def main():
199
+ st.title("Advanced Interactive Knowledge Graph")
200
+ st.write(
201
+ "Upload a CSV file to generate a fully interactive and insightful knowledge graph."
202
+ )
203
+
204
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
205
+
206
+ if uploaded_file is not None:
207
+ try:
208
+ data = pd.read_csv(uploaded_file)
209
+ st.write("Preview of the uploaded data:")
210
+ st.dataframe(data.head())
211
+
212
+ G = create_knowledge_graph(data)
213
+
214
+ st.write("Generated Knowledge Graph:")
215
+ plot = render_graph_bokeh(G)
216
+ st.bokeh_chart(plot, use_container_width=True)
217
+
218
+ topic = st.text_input(
219
+ "Enter a topic for additional insights:", "knowledge graphs"
220
+ )
221
+ if topic:
222
+ generate_insights(G, topic)
223
+
224
+ except Exception as e:
225
+ st.error(f"An error occurred while processing the file: {e}")
226
+ else:
227
+ st.info("Please upload a CSV file to get started.")
228
+
229
+
230
+ if __name__ == "__main__":
231
+ main()
keywords_database_download.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from pymongo import MongoClient
4
+ from dotenv import load_dotenv
5
+ import os
6
+ import json
7
+ import re
8
+
9
+ # 1. Load environment variables
10
+ load_dotenv()
11
+ MONGODB_URI = os.getenv(
12
+ "MONGODB_UR",
13
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
14
+ )
15
+ # 2. Create MongoDB connection
16
+ client = MongoClient(MONGODB_URI)
17
+ db = client["novascholar_db"]
18
+ collection = db["research_papers"]
19
+
20
+
21
+ def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
22
+ """
23
+ Convert any columns that contain lists into comma-separated strings
24
+ in order to ensure consistent data types for CSV export.
25
+ """
26
+ for col in df.columns:
27
+ if any(isinstance(val, list) for val in df[col].dropna()):
28
+ df[col] = df[col].apply(
29
+ lambda x: (
30
+ ", ".join(map(str, x))
31
+ if isinstance(x, list)
32
+ else (str(x) if pd.notna(x) else "")
33
+ )
34
+ )
35
+ return df
36
+
37
+
38
+ def filter_and_export_collection_to_csv(keyword: str, doc_collection=None):
39
+ """
40
+ Find documents in the given collection with a matching keyword
41
+ in the 'Keywords' field, export them to CSV, and return the DataFrame
42
+ and CSV filename.
43
+ """
44
+ # Use the default 'research_papers' collection if none provided
45
+ if doc_collection is None:
46
+ doc_collection = collection
47
+
48
+ docs = list(doc_collection.find({"Keywords": {"$regex": keyword, "$options": "i"}}))
49
+ if docs:
50
+ df = pd.DataFrame(docs)
51
+ df = convert_mixed_columns(df)
52
+ csv_filename = "papers_filtered_export.csv"
53
+ df.to_csv(csv_filename, index=False)
54
+ return df, csv_filename
55
+ else:
56
+ # Return an empty DataFrame if no documents found
57
+ return pd.DataFrame(), None
58
+
59
+
60
+ def main():
61
+ # st.set_page_config(page_title="Filter and Export Papers", layout="wide")
62
+ st.title("Filter and Export Papers by Keyword")
63
+
64
+ # Let user select the paper type
65
+ paper_type = st.selectbox(
66
+ "Select type of research paper:",
67
+ [
68
+ "Review Based Paper",
69
+ "Opinion/Perspective Based Paper",
70
+ "Empirical Research Paper",
71
+ "Research Paper (Other)",
72
+ ],
73
+ )
74
+
75
+ # 5. Let user enter the keyword to filter
76
+ keyword_input = st.text_input(
77
+ "Enter the exact keyword to filter papers by 'Keywords' field:"
78
+ )
79
+
80
+ # When user clicks button, use the collection for the selected paper type
81
+ if st.button("Export Filtered Papers to CSV"):
82
+ with st.spinner("Exporting filtered documents..."):
83
+ try:
84
+ # Determine dynamic collection based on paper type
85
+ collection_name = paper_type.replace(" ", "_").lower()
86
+ doc_collection = db[collection_name]
87
+
88
+ df, csv_filename = filter_and_export_collection_to_csv(
89
+ keyword_input, doc_collection
90
+ )
91
+ if not df.empty and csv_filename:
92
+ st.success(
93
+ f"Successfully exported filtered papers to {csv_filename}!"
94
+ )
95
+ st.write("Preview of the filtered DataFrame:")
96
+ st.dataframe(df)
97
+ else:
98
+ st.warning("No matching documents found for that keyword.")
99
+ except Exception as e:
100
+ st.error(f"Error exporting filtered papers: {str(e)}")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
live_polls.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # live_poll_feature.py
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ from datetime import datetime
6
+ from poll_db_operations import PollDatabase
7
+
8
+ class LivePollFeature:
9
+ def __init__(self):
10
+ self.db = PollDatabase()
11
+
12
+ def display_faculty_interface(self, session_id):
13
+ """Display the faculty interface for managing polls"""
14
+ st.subheader("Live Polls Management")
15
+
16
+ # Create new poll
17
+ with st.expander("Create New Poll", expanded=False):
18
+ question = st.text_input("Poll Question")
19
+
20
+ num_options = st.number_input("Number of Options",
21
+ min_value=2,
22
+ max_value=6,
23
+ value=4)
24
+
25
+ options = []
26
+ for i in range(num_options):
27
+ option = st.text_input(f"Option {i+1}",
28
+ key=f"option_{i}")
29
+ if option:
30
+ options.append(option)
31
+
32
+ if st.button("Create Poll") and question and len(options) >= 2:
33
+ self.db.create_poll(
34
+ st.session_state.selected_course,
35
+ session_id,
36
+ question,
37
+ options,
38
+ st.session_state.user_id
39
+ )
40
+ st.success("Poll created successfully!")
41
+ st.rerun()
42
+
43
+ # Display active polls
44
+ active_polls = self.db.get_active_polls(session_id)
45
+ if active_polls:
46
+ st.subheader("Active Polls")
47
+ for poll in active_polls:
48
+ with st.expander(f"Poll: {poll['question']}", expanded=True):
49
+ # Display results
50
+ self._display_poll_results(poll)
51
+
52
+ if st.button("Close Poll",
53
+ key=f"close_{str(poll['_id'])}"):
54
+ self.db.close_poll(poll['_id'])
55
+ st.success("Poll closed successfully!")
56
+ st.rerun()
57
+
58
+ def display_student_interface(self, session_id):
59
+ """Display the student interface for participating in polls"""
60
+ st.subheader("Live Polls")
61
+
62
+ active_polls = self.db.get_active_polls(session_id)
63
+ if not active_polls:
64
+ st.info("No active polls at the moment.")
65
+ return
66
+
67
+ for poll in active_polls:
68
+ with st.expander(f"Poll: {poll['question']}", expanded=True):
69
+ selected_option = st.radio(
70
+ "Your response:",
71
+ options=poll['options'],
72
+ key=f"poll_{str(poll['_id'])}"
73
+ )
74
+
75
+ if st.button("Submit Response",
76
+ key=f"submit_{str(poll['_id'])}"):
77
+ success, message = self.db.submit_response(
78
+ poll['_id'],
79
+ st.session_state.user_id,
80
+ selected_option
81
+ )
82
+ if success:
83
+ st.success(message)
84
+ else:
85
+ st.warning(message)
86
+ st.rerun()
87
+
88
+ # self._display_poll_results(poll)
89
+
90
+ def _display_poll_results(self, poll):
91
+ """Helper method to display poll results"""
92
+ responses_df = pd.DataFrame(
93
+ list(poll['responses'].items()),
94
+ columns=['Option', 'Votes']
95
+ )
96
+
97
+ total_votes = responses_df['Votes'].sum()
98
+
99
+ # Calculate percentages
100
+ if total_votes > 0:
101
+ responses_df['Percentage'] = (
102
+ responses_df['Votes'] / total_votes * 100
103
+ ).round(1)
104
+ else:
105
+ responses_df['Percentage'] = 0
106
+
107
+ # Display metrics
108
+ st.metric("Total Responses", total_votes)
109
+
110
+ # Display charts
111
+ st.bar_chart(responses_df.set_index('Option')['Votes'])
112
+
113
+ # Display detailed statistics
114
+ if st.session_state.user_type == 'faculty':
115
+ st.dataframe(responses_df)
loldude.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
+ from collections import defaultdict
9
+
10
+ def load_and_preprocess_data(uploaded_file):
11
+ """Load and preprocess the CSV data."""
12
+ df = pd.read_csv(uploaded_file)
13
+ # Combine relevant text fields for similarity comparison
14
+ df['combined_text'] = df['Title'] + ' ' + df['Abstract'] + ' ' + df['Keywords']
15
+ return df
16
+
17
+ def calculate_similarity_matrix(df):
18
+ """Calculate cosine similarity matrix based on combined text."""
19
+ tfidf = TfidfVectorizer(stop_words='english')
20
+ tfidf_matrix = tfidf.fit_transform(df['combined_text'])
21
+ similarity_matrix = cosine_similarity(tfidf_matrix)
22
+ return similarity_matrix
23
+
24
+ def find_similar_papers(similarity_matrix, df, threshold=0.7):
25
+ """Find pairs of papers with similarity above threshold."""
26
+ similar_pairs = []
27
+ for i in range(len(similarity_matrix)):
28
+ for j in range(i + 1, len(similarity_matrix)):
29
+ similarity = similarity_matrix[i][j]
30
+ if similarity >= threshold:
31
+ similar_pairs.append({
32
+ 'Paper 1': df.iloc[i]['Title'],
33
+ 'Paper 2': df.iloc[j]['Title'],
34
+ 'Similarity': similarity
35
+ })
36
+ return pd.DataFrame(similar_pairs)
37
+
38
+ def find_outliers(similarity_matrix, df, threshold=0.3):
39
+ """Find papers with low average similarity to others."""
40
+ avg_similarities = np.mean(similarity_matrix, axis=1)
41
+ outliers = []
42
+ for i, avg_sim in enumerate(avg_similarities):
43
+ if avg_sim < threshold:
44
+ outliers.append({
45
+ 'Title': df.iloc[i]['Title'],
46
+ 'Average Similarity': avg_sim
47
+ })
48
+ return pd.DataFrame(outliers)
49
+
50
+ def create_similarity_heatmap(similarity_matrix, df):
51
+ """Create a heatmap of similarity matrix."""
52
+ fig = go.Figure(data=go.Heatmap(
53
+ z=similarity_matrix,
54
+ x=df['Title'],
55
+ y=df['Title'],
56
+ colorscale='Viridis'
57
+ ))
58
+ fig.update_layout(
59
+ title='Paper Similarity Heatmap',
60
+ xaxis_tickangle=-45,
61
+ height=800
62
+ )
63
+ return fig
64
+
65
+ def analyze_keywords(df):
66
+ """Analyze keyword frequency across papers."""
67
+ keyword_freq = defaultdict(int)
68
+ for keywords in df['Keywords']:
69
+ if isinstance(keywords, str):
70
+ for keyword in keywords.split(','):
71
+ keyword = keyword.strip()
72
+ keyword_freq[keyword] += 1
73
+
74
+ keyword_df = pd.DataFrame([
75
+ {'Keyword': k, 'Frequency': v}
76
+ for k, v in keyword_freq.items()
77
+ ]).sort_values('Frequency', ascending=False)
78
+
79
+ return keyword_df
80
+
81
+ def main():
82
+ st.title('Research Papers Similarity Analysis')
83
+
84
+ uploaded_file = st.file_uploader("Upload your research papers CSV file", type=['csv'])
85
+
86
+ if uploaded_file is not None:
87
+ df = load_and_preprocess_data(uploaded_file)
88
+ similarity_matrix = calculate_similarity_matrix(df)
89
+
90
+ st.header('Document Similarity Analysis')
91
+
92
+ # Similarity Heatmap
93
+ st.subheader('Similarity Heatmap')
94
+ heatmap = create_similarity_heatmap(similarity_matrix, df)
95
+ st.plotly_chart(heatmap, use_container_width=True)
96
+
97
+ # Similar Papers
98
+ st.subheader('Similar Papers')
99
+ similarity_threshold = st.slider('Similarity Threshold', 0.0, 1.0, 0.7)
100
+ similar_papers = find_similar_papers(similarity_matrix, df, similarity_threshold)
101
+ if not similar_papers.empty:
102
+ st.dataframe(similar_papers)
103
+ else:
104
+ st.write("No papers found above the similarity threshold.")
105
+
106
+ # Outliers
107
+ st.subheader('Outlier Papers')
108
+ outlier_threshold = st.slider('Outlier Threshold', 0.0, 1.0, 0.3)
109
+ outliers = find_outliers(similarity_matrix, df, outlier_threshold)
110
+ if not outliers.empty:
111
+ st.dataframe(outliers)
112
+ else:
113
+ st.write("No outliers found below the threshold.")
114
+
115
+ # Keyword Analysis
116
+ st.header('Keyword Analysis')
117
+ keyword_freq = analyze_keywords(df)
118
+ if not keyword_freq.empty:
119
+ fig = px.bar(keyword_freq, x='Keyword', y='Frequency',
120
+ title='Keyword Frequency Across Papers')
121
+ fig.update_xaxes(tickangle=45)
122
+ st.plotly_chart(fig, use_container_width=True)
123
+
124
+ # Basic Statistics
125
+ st.header('Basic Statistics')
126
+ col1, col2 = st.columns(2)
127
+ with col1:
128
+ st.metric("Total Papers", len(df))
129
+ st.metric("Average Similarity", f"{np.mean(similarity_matrix):.2f}")
130
+ with col2:
131
+ st.metric("Unique Keywords", len(keyword_freq))
132
+ st.metric("Max Similarity", f"{np.max(similarity_matrix[~np.eye(similarity_matrix.shape[0], dtype=bool)]):.2f}")
133
+
134
+ if __name__ == "__main__":
135
+ main()
modify_schema.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from db import courses_collection2
2
+ from dotenv import load_dotenv
3
+ import os
4
+ from pymongo import MongoClient
5
+ from datetime import datetime
6
+
7
+
8
+
9
+ load_dotenv()
10
+ MONGO_URI = os.getenv("MONGO_URI")
11
+
12
+ client = MongoClient(MONGO_URI)
13
+ db = client["novascholar_db"]
14
+
15
+ # Define the updated course schema
16
+ updated_course_schema = {
17
+ "bsonType": "object",
18
+ "required": [
19
+ "course_id",
20
+ "title",
21
+ "description",
22
+ "faculty",
23
+ "faculty_id",
24
+ "duration",
25
+ "created_at",
26
+ ],
27
+ "properties": {
28
+ "course_id": {
29
+ "bsonType": "string",
30
+ "description": "Unique identifier for the course",
31
+ },
32
+ "title": {"bsonType": "string", "description": "Title of the course"},
33
+ "description": {
34
+ "bsonType": "string",
35
+ "description": "Description of the course",
36
+ },
37
+ "faculty": {"bsonType": "string", "description": "Name of the faculty"},
38
+ "duration": {"bsonType": "string", "description": "Duration of the course"},
39
+ "created_at": {
40
+ "bsonType": "date",
41
+ "description": "Date when the course was created",
42
+ },
43
+ "sessions": {
44
+ "bsonType": "array",
45
+ "description": "List of sessions associated with the course",
46
+ "items": {
47
+ "bsonType": "object",
48
+ "required": ["session_id", "title", "date"],
49
+ "properties": {
50
+ "session_id": {
51
+ "bsonType": "string",
52
+ "description": "Unique identifier for the session",
53
+ },
54
+ "title": {
55
+ "bsonType": "string",
56
+ "description": "Title of the session",
57
+ },
58
+ "date": {"bsonType": "date", "description": "Date of the session"},
59
+ "status": {
60
+ "bsonType": "string",
61
+ "description": "Status of the session (e.g., completed, upcoming)",
62
+ },
63
+ "created_at": {
64
+ "bsonType": "date",
65
+ "description": "Date when the session was created",
66
+ },
67
+ "pre_class": {
68
+ "bsonType": "object",
69
+ "description": "Pre-class segment data",
70
+ "properties": {
71
+ "resources": {
72
+ "bsonType": "array",
73
+ "description": "List of pre-class resources",
74
+ "items": {
75
+ "bsonType": "object",
76
+ "required": ["type", "title", "url"],
77
+ "properties": {
78
+ "type": {
79
+ "bsonType": "string",
80
+ "description": "Type of resource (e.g., pdf, video)",
81
+ },
82
+ "title": {
83
+ "bsonType": "string",
84
+ "description": "Title of the resource",
85
+ },
86
+ "url": {
87
+ "bsonType": "string",
88
+ "description": "URL of the resource",
89
+ },
90
+ "vector": {
91
+ "bsonType": "array",
92
+ "description": "Vector representation of the resource",
93
+ "items": {"bsonType": "double"},
94
+ },
95
+ },
96
+ },
97
+ },
98
+ "completion_required": {
99
+ "bsonType": "bool",
100
+ "description": "Indicates if completion of pre-class resources is required",
101
+ },
102
+ },
103
+ },
104
+ "in_class": {
105
+ "bsonType": "object",
106
+ "description": "In-class segment data",
107
+ "properties": {
108
+ "topics": {
109
+ "bsonType": "array",
110
+ "description": "List of topics covered in the session",
111
+ "items": {"bsonType": "string"},
112
+ },
113
+ "quiz": {
114
+ "bsonType": "object",
115
+ "description": "Quiz data",
116
+ "properties": {
117
+ "title": {
118
+ "bsonType": "string",
119
+ "description": "Title of the quiz",
120
+ },
121
+ "questions": {
122
+ "bsonType": "int",
123
+ "description": "Number of questions in the quiz",
124
+ },
125
+ "duration": {
126
+ "bsonType": "int",
127
+ "description": "Duration of the quiz in minutes",
128
+ },
129
+ },
130
+ },
131
+ "polls": {
132
+ "bsonType": "array",
133
+ "description": "List of polls conducted during the session",
134
+ "items": {
135
+ "bsonType": "object",
136
+ "required": ["question", "options"],
137
+ "properties": {
138
+ "question": {
139
+ "bsonType": "string",
140
+ "description": "Poll question",
141
+ },
142
+ "options": {
143
+ "bsonType": "array",
144
+ "description": "List of poll options",
145
+ "items": {"bsonType": "string"},
146
+ },
147
+ "responses": {
148
+ "bsonType": "object",
149
+ "description": "Responses to the poll",
150
+ "additionalProperties": {"bsonType": "int"},
151
+ },
152
+ },
153
+ },
154
+ },
155
+ },
156
+ },
157
+ "post_class": {
158
+ "bsonType": "object",
159
+ "description": "Post-class segment data",
160
+ "properties": {
161
+ "assignments": {
162
+ "bsonType": "array",
163
+ "description": "List of assignments",
164
+ "items": {
165
+ "bsonType": "object",
166
+ "required": ["id", "title", "due_date", "status"],
167
+ "properties": {
168
+ "id": {
169
+ "bsonType": ["objectId", "int"],
170
+ "description": "Assignment ID",
171
+ },
172
+ "title": {
173
+ "bsonType": "string",
174
+ "description": "Title of the assignment",
175
+ },
176
+ "due_date": {
177
+ "bsonType": "date",
178
+ "description": "Due date of the assignment",
179
+ },
180
+ "status": {
181
+ "bsonType": "string",
182
+ "description": "Status of the assignment (e.g., pending, completed)",
183
+ },
184
+ "submissions": {
185
+ "bsonType": "array",
186
+ "description": "List of submissions",
187
+ "items": {
188
+ "bsonType": "object",
189
+ "properties": {
190
+ "student_id": {
191
+ "bsonType": "objectId",
192
+ "description": "ID of the student who submitted the assignment",
193
+ },
194
+ "file_url": {
195
+ "bsonType": "string",
196
+ "description": "URL of the submitted file",
197
+ },
198
+ "submitted_at": {
199
+ "bsonType": "date",
200
+ "description": "Date when the assignment was submitted",
201
+ },
202
+ },
203
+ },
204
+ },
205
+ },
206
+ },
207
+ }
208
+ },
209
+ },
210
+ },
211
+ },
212
+ },
213
+ },
214
+ }
215
+
216
+ # Update the schema using the collMod command
217
+ db.command({
218
+ "collMod": "courses_collection2",
219
+ "validator": {"$jsonSchema": updated_course_schema}
220
+ })
221
+
222
+ print("Schema updated successfully!")
new_keywords.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from pymongo import MongoClient
4
+ from dotenv import load_dotenv
5
+ import os
6
+ import json
7
+ import re
8
+
9
+ # 1. Load environment variables
10
+ load_dotenv()
11
+ MONGODB_URI = os.getenv(
12
+ "MONGODB_UR",
13
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
14
+ )
15
+ # 2. Create MongoDB connection
16
+ client = MongoClient(MONGODB_URI)
17
+ db = client["novascholar_db"]
18
+ collection = db["research_papers"]
19
+
20
+
21
+ def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
22
+ """
23
+ Convert any columns that contain lists into comma-separated strings
24
+ to ensure consistent data types for CSV export.
25
+ """
26
+ for col in df.columns:
27
+ if any(isinstance(val, list) for val in df[col].dropna()):
28
+ df[col] = df[col].apply(
29
+ lambda x: (
30
+ ", ".join(map(str, x))
31
+ if isinstance(x, list)
32
+ else (str(x) if pd.notna(x) else "")
33
+ )
34
+ )
35
+ return df
36
+
37
+
38
+ def filter_and_export_collection_to_csv(keywords_list, doc_collection):
39
+ """
40
+ Fetch documents from the specified collection where the 'Keywords' field
41
+ matches ANY of the keywords in 'keywords_list'. Convert to DataFrame,
42
+ ensure consistent column types, save to CSV, and return the DataFrame
43
+ and CSV filename.
44
+ """
45
+ # 3. Retrieve filtered documents from the collection based on 'Keywords' using $in with regex for substring matching
46
+ regex_keywords = [f".*{keyword}.*" for keyword in keywords_list]
47
+ docs = list(
48
+ doc_collection.find(
49
+ {"Keywords": {"$regex": "|".join(regex_keywords), "$options": "i"}}
50
+ )
51
+ )
52
+
53
+ # Convert documents to DataFrame
54
+ df = pd.DataFrame(docs)
55
+
56
+ if not df.empty:
57
+ # 4. Convert mixed columns
58
+ df = convert_mixed_columns(df)
59
+ # 5. Export to CSV
60
+ csv_filename = "filtered_papers_export.csv"
61
+ df.to_csv(csv_filename, index=False)
62
+ return df, csv_filename
63
+ else:
64
+ # Return an empty DataFrame and None if no documents found
65
+ return pd.DataFrame(), None
66
+
67
+
68
+ def main():
69
+ st.title("Filter and Export Papers by Keyword")
70
+
71
+ # Let user select the paper type
72
+ paper_type = st.selectbox(
73
+ "Select type of research paper:",
74
+ [
75
+ "Review Based Paper",
76
+ "Opinion/Perspective Based Paper",
77
+ "Empirical Research Paper",
78
+ "Research Paper (Other)",
79
+ ],
80
+ )
81
+
82
+ # Let user enter the keyword to filter
83
+ keyword_input = st.text_input(
84
+ "Enter the exact keyword to filter papers by 'Keywords' field:"
85
+ )
86
+
87
+ # When user clicks button, use the collection for the selected paper type
88
+ if st.button("Export Filtered Papers to CSV"):
89
+ with st.spinner("Exporting filtered documents..."):
90
+ try:
91
+ # Determine dynamic collection based on paper type
92
+ collection_name = paper_type.replace(" ", "_").lower()
93
+ doc_collection = db[collection_name]
94
+
95
+ # Split keywords by commas and strip whitespace
96
+ keywords_list = [
97
+ kw.strip() for kw in keyword_input.split(",") if kw.strip()
98
+ ]
99
+
100
+ if not keywords_list:
101
+ st.warning("Please enter at least one keyword.")
102
+ else:
103
+ df, csv_filename = filter_and_export_collection_to_csv(
104
+ keywords_list, doc_collection
105
+ )
106
+ if not df.empty and csv_filename:
107
+ st.success(
108
+ f"Successfully exported filtered papers to {csv_filename}!"
109
+ )
110
+ st.download_button(
111
+ label="Download CSV",
112
+ data=df.to_csv(index=False).encode("utf-8"),
113
+ file_name=csv_filename,
114
+ mime="text/csv",
115
+ )
116
+ st.write("Preview of the filtered DataFrame:")
117
+ st.dataframe(df)
118
+ else:
119
+ st.warning(
120
+ "No matching documents found for the provided keyword(s)."
121
+ )
122
+ except Exception as e:
123
+ st.error(f"Error exporting filtered papers: {str(e)}")
124
+
125
+
126
+ if __name__ == "__main__":
127
+ main()
new_research_paper.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import requests
4
+ import json
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
11
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
12
+
13
+
14
+ def call_perplexity_api(prompt: str) -> str:
15
+ """Call Perplexity AI with a prompt, return the text response if successful."""
16
+ headers = {
17
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
18
+ "Content-Type": "application/json",
19
+ }
20
+
21
+ payload = {
22
+ "model": "llama-3.1-sonar-small-128k-chat",
23
+ "messages": [{"role": "user", "content": prompt}],
24
+ "temperature": 0.3,
25
+ }
26
+
27
+ try:
28
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
29
+ response.raise_for_status()
30
+ return response.json()["choices"][0]["message"]["content"]
31
+ except Exception as e:
32
+ st.error(f"API Error: {str(e)}")
33
+ return ""
34
+
35
+
36
+ def generate_research_paper(df: pd.DataFrame) -> dict:
37
+ """
38
+ For each column in the DataFrame, generate a research paper section (200-500 words)
39
+ that addresses the data in that column. Return a dict mapping column -> text.
40
+ """
41
+ paper_sections = {}
42
+ for col in df.columns:
43
+ # Convert all non-null rows in the column to strings and join them for context
44
+ col_values = df[col].dropna().astype(str).tolist()
45
+ # We'll truncate if this is huge
46
+ sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
47
+ prompt = f"""
48
+ Topic: {col}
49
+ Data Sample: {sample_text}
50
+
51
+ Generate a professional research paper section for the above column.
52
+ The section should be at least 100 words and at most 150 words,
53
+ focusing on key insights, challenges, and potential research angles.
54
+ Integrate the data samples as context for the content.
55
+ """
56
+ section_text = call_perplexity_api(prompt)
57
+ paper_sections[col] = section_text.strip() if section_text else ""
58
+ return paper_sections
59
+
60
+
61
+ def format_paper(paper_dict: dict) -> str:
62
+ """
63
+ Format the generated paper into a Markdown string.
64
+ Each column name is used as a heading, and the text is placed under it.
65
+ """
66
+ md_text = "# Generated Research Paper\n\n"
67
+ for col, content in paper_dict.items():
68
+ md_text += f"## {col}\n{content}\n\n"
69
+ return md_text
70
+
71
+
72
+ def main():
73
+ st.title("Corpus-based Research Paper Generator")
74
+
75
+ uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
76
+ if uploaded_file:
77
+ df = pd.read_csv(uploaded_file)
78
+ st.write("### Preview of Uploaded Data")
79
+ st.dataframe(df.head())
80
+
81
+ if st.button("Generate Research Paper"):
82
+ st.info("Generating paper based on the columns of your corpus...")
83
+ with st.spinner("Calling Perplexity AI..."):
84
+ paper = generate_research_paper(df)
85
+ if paper:
86
+ formatted_paper = format_paper(paper)
87
+ st.success("Research Paper Generated Successfully!")
88
+ st.write(formatted_paper)
89
+
90
+ st.download_button(
91
+ label="Download Paper as Markdown",
92
+ data=formatted_paper,
93
+ file_name="research_paper.md",
94
+ mime="text/markdown",
95
+ )
96
+ else:
97
+ st.error(
98
+ "Paper generation failed. Please check Perplexity API key."
99
+ )
100
+
101
+
102
+ if __name__ == "__main__":
103
+ main()
poll_db_operations.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pymongo import MongoClient
2
+ from datetime import datetime
3
+ from bson import ObjectId
4
+ from dotenv import load_dotenv
5
+ import os
6
+
7
+ load_dotenv()
8
+ MONGO_URI = os.getenv('MONGO_URI')
9
+ class PollDatabase:
10
+ def __init__(self):
11
+ self.client = MongoClient(MONGO_URI)
12
+ self.db = self.client["novascholar_db"]
13
+
14
+ def create_poll(self, course_id, session_id, question, options, faculty_id):
15
+ """Create a new poll"""
16
+ poll = {
17
+ "course_id": course_id,
18
+ "session_id": session_id,
19
+ "faculty_id": faculty_id,
20
+ "question": question,
21
+ "options": options,
22
+ "status": "active",
23
+ "created_at": datetime.now(),
24
+ "responses": {option: 0 for option in options}
25
+ }
26
+ return self.db.polls.insert_one(poll)
27
+
28
+ def get_active_polls(self, session_id):
29
+ """Get all active polls for a session"""
30
+ return list(self.db.polls.find({
31
+ "session_id": session_id,
32
+ "status": "active"
33
+ }))
34
+
35
+ def submit_response(self, poll_id, student_id, selected_option):
36
+ """Submit a student's response to a poll"""
37
+ try:
38
+ # Record individual response
39
+ response = {
40
+ "poll_id": poll_id,
41
+ "student_id": student_id,
42
+ "selected_option": selected_option,
43
+ "submitted_at": datetime.now()
44
+ }
45
+ self.db.poll_responses.insert_one(response)
46
+
47
+ # Update aggregated results
48
+ self.db.polls.update_one(
49
+ {"_id": ObjectId(poll_id)},
50
+ {"$inc": {f"responses.{selected_option}": 1}}
51
+ )
52
+ return True, "Vote recorded successfully"
53
+
54
+ except Exception as e:
55
+ if "duplicate key error" in str(e):
56
+ return False, "You have already voted in this poll"
57
+ return False, f"Error recording vote: {str(e)}"
58
+
59
+ def close_poll(self, poll_id):
60
+ """Close a poll"""
61
+ return self.db.polls.update_one(
62
+ {"_id": ObjectId(poll_id)},
63
+ {"$set": {"status": "closed"}}
64
+ )
65
+
66
+ def get_poll_analytics(self, poll_id):
67
+ """Get detailed analytics for a poll"""
68
+ poll = self.db.polls.find_one({"_id": ObjectId(poll_id)})
69
+ responses = self.db.poll_responses.find({"poll_id": ObjectId(poll_id)})
70
+ return poll, list(responses)
poll_db_setup.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pymongo import MongoClient
2
+ from datetime import datetime
3
+ from dotenv import load_dotenv
4
+ import os
5
+
6
+ load_dotenv()
7
+ MONGO_URI = os.getenv('MONGO_URI')
8
+ def setup_mongodb():
9
+ """Initialize MongoDB connection and create collections with indexes"""
10
+ client = MongoClient(MONGO_URI)
11
+ db = client["novascholar_db"]
12
+
13
+ # Create indexes for polls collection
14
+ db.polls.create_index([("session_id", 1), ("status", 1)])
15
+ db.polls.create_index([("course_id", 1)])
16
+
17
+ # Create unique index for poll_responses to prevent duplicate votes
18
+ db.poll_responses.create_index(
19
+ [("poll_id", 1), ("student_id", 1)],
20
+ unique=True
21
+ )
22
+
23
+ return "Database setup completed successfully"
24
+
25
+ def print_all_polls():
26
+ """Print all polls in the database"""
27
+ client = MongoClient(MONGO_URI)
28
+ db = client["novascholar_db"]
29
+
30
+ polls = db.polls.find()
31
+ for poll in polls:
32
+ print(poll)
33
+
34
+ if __name__ == "__main__":
35
+ print(print_all_polls())
pre_class_analytics2.py ADDED
@@ -0,0 +1,759 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import typing_extensions as typing
3
+ import google.generativeai as genai
4
+ from typing import List, Dict, Any
5
+ import numpy as np
6
+ from collections import defaultdict
7
+
8
+ from dotenv import load_dotenv
9
+ import os
10
+ import pymongo
11
+ from pymongo import MongoClient
12
+
13
+ load_dotenv()
14
+ GEMINI_API_KEY = os.getenv('GEMINI_KEY')
15
+
16
+ class EngagementMetrics(typing.TypedDict):
17
+ participation_level: str # "high" | "medium" | "low"
18
+ question_quality: str # "advanced" | "intermediate" | "basic"
19
+ concept_understanding: str # "strong" | "moderate" | "needs_improvement"
20
+
21
+ class StudentInsight(typing.TypedDict):
22
+ student_id: str
23
+ performance_level: str # "high_performer" | "average" | "at_risk"
24
+ struggling_topics: list[str]
25
+ engagement_metrics: EngagementMetrics
26
+
27
+ class TopicInsight(typing.TypedDict):
28
+ topic: str
29
+ difficulty_level: float # 0 to 1
30
+ student_count: int
31
+ common_issues: list[str]
32
+ key_misconceptions: list[str]
33
+
34
+ class RecommendedAction(typing.TypedDict):
35
+ action: str
36
+ priority: str # "high" | "medium" | "low"
37
+ target_group: str # "all_students" | "specific_students" | "faculty"
38
+ reasoning: str
39
+ expected_impact: str
40
+
41
+ class ClassDistribution(typing.TypedDict):
42
+ high_performers: float
43
+ average_performers: float
44
+ at_risk: float
45
+
46
+ class CourseHealth(typing.TypedDict):
47
+ overall_engagement: float # 0 to 1
48
+ critical_topics: list[str]
49
+ class_distribution: ClassDistribution
50
+
51
+ class InterventionMetrics(typing.TypedDict):
52
+ immediate_attention_needed: list[str] # student_ids
53
+ monitoring_required: list[str] # student_ids
54
+
55
+ class AnalyticsResponse(typing.TypedDict):
56
+ topic_insights: list[TopicInsight]
57
+ student_insights: list[StudentInsight]
58
+ recommended_actions: list[RecommendedAction]
59
+ course_health: CourseHealth
60
+ intervention_metrics: InterventionMetrics
61
+
62
+
63
+
64
+ class NovaScholarAnalytics:
65
+ def __init__(self, model_name: str = "gemini-1.5-flash"):
66
+ genai.configure(api_key=GEMINI_API_KEY)
67
+ self.model = genai.GenerativeModel(model_name)
68
+
69
+ def _create_analytics_prompt(self, chat_histories: List[Dict], all_topics: List[str]) -> str:
70
+ """Creates a structured prompt for Gemini to analyze chat histories."""
71
+ # Prompt 1:
72
+ # return f"""Analyze these student chat histories for a university course and provide detailed analytics.
73
+
74
+ # Context:
75
+ # - These are pre-class chat interactions between students and an AI tutor
76
+ # - Topics covered: {', '.join(all_topics)}
77
+
78
+ # Chat histories: {json.dumps(chat_histories, indent=2)}
79
+
80
+ # Return the analysis in JSON format matching this exact schema:
81
+ # {AnalyticsResponse.__annotations__}
82
+
83
+ # Ensure all numeric values are between 0 and 1 (accuracy upto 3 decimal places) where applicable.
84
+
85
+ # Important analysis guidelines:
86
+ # 1. Identify topics where students show confusion or ask multiple follow-up questions
87
+ # 2. Look for patterns in question types and complexity
88
+ # 3. Analyze response understanding based on follow-up questions
89
+ # 4. Consider both explicit and implicit signs of difficulty
90
+ # 5. Focus on concept relationships and prerequisite understanding"""
91
+
92
+ # Prompt 2:
93
+ # return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
94
+
95
+ # Context:
96
+ # - Chat histories: {json.dumps(chat_histories, indent=2)}
97
+ # - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
98
+ # - Topics covered: {', '.join(all_topics)}.
99
+
100
+ # Your task is to extract key insights that will help faculty address challenges effectively and enhance learning outcomes.
101
+
102
+ # Output Format:
103
+ # 1. Topics where students face significant difficulties:
104
+ # - Provide a ranked list of topics where the majority of students are struggling, based on the frequency and nature of their questions or misconceptions.
105
+ # - Include the percentage of students who found each topic challenging.
106
+
107
+ # 2. AI-recommended actions for faculty:
108
+ # - Suggest actionable steps to address the difficulties identified in each critical topic.
109
+ # - Specify the priority of each action (high, medium, low) based on the urgency and impact.
110
+ # - Explain the reasoning behind each recommendation and its expected impact on student outcomes.
111
+
112
+ # 3. Student-specific analytics (focusing on at-risk students):
113
+ # - Identify students categorized as "at-risk" based on their engagement levels, question complexity, and recurring struggles.
114
+ # - For each at-risk student, list their top 3 struggling topics and their engagement metrics (participation level, concept understanding).
115
+ # - Provide personalized recommendations for improving their understanding.
116
+
117
+ # Guidelines for Analysis:
118
+ # - Focus on actionable and concise insights rather than exhaustive details.
119
+ # - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
120
+ # - Prioritize topics with higher difficulty scores or more students struggling.
121
+ # - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
122
+
123
+ # The response must be well-structured, concise, and highly actionable for faculty to implement improvements effectively."""
124
+
125
+ # Prompt 3:
126
+ return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
127
+ Context:
128
+ - Chat histories: {json.dumps(chat_histories, indent=2)}
129
+ - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
130
+ - Topics covered: {', '.join(all_topics)}.
131
+
132
+ Your task is to provide detailed analytics that will help faculty address challenges effectively and enhance learning outcomes.
133
+
134
+ Output Format (strictly follow this JSON structure):
135
+ {{
136
+ "topic_wise_insights": [
137
+ {{
138
+ "topic": "<string>",
139
+ "struggling_percentage": <number between 0 and 1>,
140
+ "key_issues": ["<string>", "<string>", ...],
141
+ "key_misconceptions": ["<string>", "<string>", ...],
142
+ "recommended_actions": {{
143
+ "description": "<string>",
144
+ "priority": "high|medium|low",
145
+ "expected_outcome": "<string>"
146
+ }}
147
+ }}
148
+ ],
149
+ "ai_recommended_actions": [
150
+ {{
151
+ "action": "<string>",
152
+ "priority": "high|medium|low",
153
+ "reasoning": "<string>",
154
+ "expected_outcome": "<string>",
155
+ "pedagogy_recommendations": {{
156
+ "methods": ["<string>", "<string>", ...],
157
+ "resources": ["<string>", "<string>", ...],
158
+ "expected_impact": "<string>"
159
+ }}
160
+ }}
161
+ ],
162
+ "student_analytics": [
163
+ {{
164
+ "student_id": "<string>",
165
+ "engagement_metrics": {{
166
+ "participation_level": <number between 0 and 1>,
167
+ "concept_understanding": "strong|moderate|needs_improvement",
168
+ "question_quality": "advanced|intermediate|basic"
169
+ }},
170
+ "struggling_topics": ["<string>", "<string>", ...],
171
+ "personalized_recommendation": "<string>"
172
+ }}
173
+ ]
174
+ }}
175
+
176
+ Guidelines for Analysis:
177
+ - Focus on actionable and concise insights rather than exhaustive details.
178
+ - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
179
+ - Prioritize topics with higher difficulty scores or more students struggling.
180
+ - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
181
+ - Make sure to include All** students in the analysis, not just a subset.
182
+ - for the ai_recommended_actions:
183
+ - Prioritize pedagogy recommendations for critical topics with the high difficulty scores or struggling percentages.
184
+ - For each action:
185
+ - Include specific teaching methods (e.g., interactive discussions or quizzes, problem-based learning, practical examples etc).
186
+ - Recommend supporting resources (e.g., videos, handouts, simulations).
187
+ - Provide reasoning for the recommendation and the expected outcomes for student learning.
188
+ - Example:
189
+ - **Action:** Conduct an interactive problem-solving session on "<Topic Name>".
190
+ - **Reasoning:** Students showed difficulty in applying concepts to practical problems.
191
+ - **Expected Outcome:** Improved practical understanding and application of the topic.
192
+ - **Pedagogy Recommendations:**
193
+ - **Methods:** Group discussions, real-world case studies.
194
+ - **Resources:** Online interactive tools, relevant case studies, video walkthroughs.
195
+ - **Expected Impact:** Enhance conceptual clarity by 40% and practical application by 30%.
196
+
197
+ The response must adhere strictly to the above JSON structure, with all fields populated appropriately."""
198
+
199
+
200
+ def _calculate_class_distribution(self, analytics: Dict) -> Dict:
201
+ """Calculate the distribution of students across performance levels."""
202
+ try:
203
+ total_students = len(analytics.get("student_insights", []))
204
+ if total_students == 0:
205
+ return {
206
+ "high_performers": 0,
207
+ "average_performers": 0,
208
+ "at_risk": 0
209
+ }
210
+
211
+ distribution = defaultdict(int)
212
+
213
+ for student in analytics.get("student_insights", []):
214
+ performance_level = student.get("performance_level", "average")
215
+ # Map performance levels to our three categories
216
+ if performance_level in ["excellent", "high", "high_performer"]:
217
+ distribution["high_performers"] += 1
218
+ elif performance_level in ["struggling", "low", "at_risk"]:
219
+ distribution["at_risk"] += 1
220
+ else:
221
+ distribution["average_performers"] += 1
222
+
223
+ # Convert to percentages
224
+ return {
225
+ level: count/total_students
226
+ for level, count in distribution.items()
227
+ }
228
+ except Exception as e:
229
+ print(f"Error calculating class distribution: {str(e)}")
230
+ return {
231
+ "high_performers": 0,
232
+ "average_performers": 0,
233
+ "at_risk": 0
234
+ }
235
+
236
+ def _identify_urgent_cases(self, analytics: Dict) -> List[str]:
237
+ """Identify students needing immediate attention."""
238
+ try:
239
+ urgent_cases = []
240
+ for student in analytics.get("student_insights", []):
241
+ student_id = student.get("student_id")
242
+ if not student_id:
243
+ continue
244
+
245
+ # Check multiple risk factors
246
+ risk_factors = 0
247
+
248
+ # Factor 1: Performance level
249
+ if student.get("performance_level") in ["struggling", "at_risk", "low"]:
250
+ risk_factors += 1
251
+
252
+ # Factor 2: Number of struggling topics
253
+ if len(student.get("struggling_topics", [])) >= 2:
254
+ risk_factors += 1
255
+
256
+ # Factor 3: Engagement metrics
257
+ engagement = student.get("engagement_metrics", {})
258
+ if (engagement.get("participation_level") == "low" or
259
+ engagement.get("concept_understanding") == "needs_improvement"):
260
+ risk_factors += 1
261
+
262
+ # If student has multiple risk factors, add to urgent cases
263
+ if risk_factors >= 2:
264
+ urgent_cases.append(student_id)
265
+
266
+ return urgent_cases
267
+ except Exception as e:
268
+ print(f"Error identifying urgent cases: {str(e)}")
269
+ return []
270
+
271
+ def _identify_monitoring_cases(self, analytics: Dict) -> List[str]:
272
+ """Identify students who need monitoring but aren't urgent cases."""
273
+ try:
274
+ monitoring_cases = []
275
+ urgent_cases = set(self._identify_urgent_cases(analytics))
276
+
277
+ for student in analytics.get("student_insights", []):
278
+ student_id = student.get("student_id")
279
+ if not student_id or student_id in urgent_cases:
280
+ continue
281
+
282
+ # Check monitoring criteria
283
+ monitoring_needed = False
284
+
285
+ # Criterion 1: Has some struggling topics but not enough for urgent
286
+ if len(student.get("struggling_topics", [])) == 1:
287
+ monitoring_needed = True
288
+
289
+ # Criterion 2: Medium-low engagement
290
+ engagement = student.get("engagement_metrics", {})
291
+ if engagement.get("participation_level") == "medium":
292
+ monitoring_needed = True
293
+
294
+ # Criterion 3: Recent performance decline
295
+ if student.get("performance_level") == "average":
296
+ monitoring_needed = True
297
+
298
+ if monitoring_needed:
299
+ monitoring_cases.append(student_id)
300
+
301
+ return monitoring_cases
302
+ except Exception as e:
303
+ print(f"Error identifying monitoring cases: {str(e)}")
304
+ return []
305
+
306
+ def _identify_critical_topics(self, analytics: Dict) -> List[str]:
307
+ """
308
+ Identify critical topics that need attention based on multiple factors.
309
+ Returns a list of topic names that are considered critical.
310
+ """
311
+ try:
312
+ critical_topics = []
313
+ topics = analytics.get("topic_insights", [])
314
+
315
+ for topic in topics:
316
+ if not isinstance(topic, dict):
317
+ continue
318
+
319
+ # Initialize score for topic criticality
320
+ critical_score = 0
321
+
322
+ # Factor 1: High difficulty level
323
+ difficulty_level = topic.get("difficulty_level", 0)
324
+ if difficulty_level > 0.7:
325
+ critical_score += 2
326
+ elif difficulty_level > 0.5:
327
+ critical_score += 1
328
+
329
+ # Factor 2: Number of students struggling
330
+ student_count = topic.get("student_count", 0)
331
+ total_students = len(analytics.get("student_insights", []))
332
+ if total_students > 0:
333
+ struggle_ratio = student_count / total_students
334
+ if struggle_ratio > 0.5:
335
+ critical_score += 2
336
+ elif struggle_ratio > 0.3:
337
+ critical_score += 1
338
+
339
+ # Factor 3: Number of common issues
340
+ if len(topic.get("common_issues", [])) > 2:
341
+ critical_score += 1
342
+
343
+ # Factor 4: Number of key misconceptions
344
+ if len(topic.get("key_misconceptions", [])) > 1:
345
+ critical_score += 1
346
+
347
+ # If topic exceeds threshold, mark as critical
348
+ if critical_score >= 3:
349
+ critical_topics.append(topic.get("topic", "Unknown Topic"))
350
+
351
+ return critical_topics
352
+
353
+ except Exception as e:
354
+ print(f"Error identifying critical topics: {str(e)}")
355
+ return []
356
+
357
+ def _calculate_engagement(self, analytics: Dict) -> Dict:
358
+ """
359
+ Calculate detailed engagement metrics across all students.
360
+ Returns a dictionary with engagement statistics.
361
+ """
362
+ try:
363
+ total_students = len(analytics.get("student_insights", []))
364
+ if total_students == 0:
365
+ return {
366
+ "total_students": 0,
367
+ "overall_score": 0,
368
+ "engagement_distribution": {
369
+ "high": 0,
370
+ "medium": 0,
371
+ "low": 0
372
+ },
373
+ "participation_metrics": {
374
+ "average_topics_per_student": 0,
375
+ "active_participants": 0
376
+ }
377
+ }
378
+
379
+ engagement_levels = defaultdict(int)
380
+ total_topics_engaged = 0
381
+ active_participants = 0
382
+
383
+ for student in analytics.get("student_insights", []):
384
+ # Get engagement metrics
385
+ metrics = student.get("engagement_metrics", {})
386
+
387
+ # Calculate participation level
388
+ participation = metrics.get("participation_level", "low").lower()
389
+ engagement_levels[participation] += 1
390
+
391
+ # Count topics student is engaged with
392
+ topics_count = len(student.get("struggling_topics", []))
393
+ total_topics_engaged += topics_count
394
+
395
+ # Count active participants (students engaging with any topics)
396
+ if topics_count > 0:
397
+ active_participants += 1
398
+
399
+ # Calculate overall engagement score (0-1)
400
+ weighted_score = (
401
+ (engagement_levels["high"] * 1.0 +
402
+ engagement_levels["medium"] * 0.6 +
403
+ engagement_levels["low"] * 0.2) / total_students
404
+ )
405
+
406
+ return {
407
+ "total_students": total_students,
408
+ "overall_score": round(weighted_score, 2),
409
+ "engagement_distribution": {
410
+ level: count/total_students
411
+ for level, count in engagement_levels.items()
412
+ },
413
+ "participation_metrics": {
414
+ "average_topics_per_student": round(total_topics_engaged / total_students, 2),
415
+ "active_participants_ratio": round(active_participants / total_students, 2)
416
+ }
417
+ }
418
+
419
+ except Exception as e:
420
+ print(f"Error calculating engagement: {str(e)}")
421
+ return {
422
+ "total_students": 0,
423
+ "overall_score": 0,
424
+ "engagement_distribution": {
425
+ "high": 0,
426
+ "medium": 0,
427
+ "low": 0
428
+ },
429
+ "participation_metrics": {
430
+ "average_topics_per_student": 0,
431
+ "active_participants_ratio": 0
432
+ }
433
+ }
434
+
435
+ def _process_gemini_response(self, response: str) -> Dict:
436
+ """Process and validate Gemini's response."""
437
+ # try:
438
+ # analytics = json.loads(response)
439
+ # return self._enrich_analytics(analytics)
440
+ # except json.JSONDecodeError as e:
441
+ # print(f"Error decoding Gemini response: {e}")
442
+ # return self._fallback_analytics()
443
+ try:
444
+ # Parse JSON response
445
+ analytics = json.loads(response)
446
+
447
+ # Validate required fields exist
448
+ required_fields = {
449
+ "topic_insights": [],
450
+ "student_insights": [],
451
+ "recommended_actions": []
452
+ }
453
+
454
+ # Ensure all required fields exist with default values
455
+ for field, default_value in required_fields.items():
456
+ if field not in analytics or not analytics[field]:
457
+ analytics[field] = default_value
458
+
459
+ # Now enrich the validated analytics
460
+ return self._enrich_analytics(analytics)
461
+
462
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
463
+ print(f"Error processing Gemini response: {str(e)}")
464
+ print(f"Raw response: {response}")
465
+ return self._fallback_analytics()
466
+
467
+ def _enrich_analytics(self, analytics: Dict) -> Dict:
468
+ """Add derived insights and metrics to the analytics."""
469
+ # Add overall course health metrics
470
+ analytics["course_health"] = {
471
+ "overall_engagement": self._calculate_engagement(analytics),
472
+ "critical_topics": self._identify_critical_topics(analytics),
473
+ "class_distribution": self._calculate_class_distribution(analytics)
474
+ }
475
+
476
+ # Add intervention urgency scores
477
+ analytics["intervention_metrics"] = {
478
+ "immediate_attention_needed": self._identify_urgent_cases(analytics),
479
+ "monitoring_required": self._identify_monitoring_cases(analytics)
480
+ }
481
+
482
+ return analytics
483
+
484
+ def _calculate_engagement(self, analytics: Dict) -> Dict:
485
+ # """Calculate overall engagement metrics."""
486
+ # total_students = len(analytics["student_insights"])
487
+ # engagement_levels = defaultdict(int)
488
+
489
+ # for student in analytics["student_insights"]:
490
+ # engagement_levels[student["engagement_metrics"]["participation_level"]] += 1
491
+
492
+ # return {
493
+ # "total_students": total_students,
494
+ # "engagement_distribution": {
495
+ # level: count/total_students
496
+ # for level, count in engagement_levels.items()
497
+ # }
498
+ # }
499
+ """Calculate overall engagement metrics with defensive programming."""
500
+ try:
501
+ total_students = len(analytics.get("student_insights", []))
502
+ if total_students == 0:
503
+ return {
504
+ "total_students": 0,
505
+ "engagement_distribution": {
506
+ "high": 0,
507
+ "medium": 0,
508
+ "low": 0
509
+ }
510
+ }
511
+
512
+ engagement_levels = defaultdict(int)
513
+
514
+ for student in analytics.get("student_insights", []):
515
+ metrics = student.get("engagement_metrics", {})
516
+ level = metrics.get("participation_level", "low")
517
+ engagement_levels[level] += 1
518
+
519
+ return {
520
+ "total_students": total_students,
521
+ "engagement_distribution": {
522
+ level: count/total_students
523
+ for level, count in engagement_levels.items()
524
+ }
525
+ }
526
+ except Exception as e:
527
+ print(f"Error calculating engagement: {str(e)}")
528
+ return {
529
+ "total_students": 0,
530
+ "engagement_distribution": {
531
+ "high": 0,
532
+ "medium": 0,
533
+ "low": 0
534
+ }
535
+ }
536
+
537
+ def _identify_critical_topics(self, analytics: Dict) -> List[Dict]:
538
+ # """Identify topics needing immediate attention."""
539
+ # return [
540
+ # topic for topic in analytics["topic_insights"]
541
+ # if topic["difficulty_level"] > 0.7 or
542
+ # len(topic["common_issues"]) > 2
543
+ # ]
544
+ """Identify topics needing immediate attention with defensive programming."""
545
+ try:
546
+ return [
547
+ topic for topic in analytics.get("topic_insights", [])
548
+ if topic.get("difficulty_level", 0) > 0.7 or
549
+ len(topic.get("common_issues", [])) > 2
550
+ ]
551
+ except Exception as e:
552
+ print(f"Error identifying critical topics: {str(e)}")
553
+ return []
554
+
555
+ def generate_analytics(self, chat_histories: List[Dict], all_topics: List[str]) -> Dict:
556
+ # Method 1: (caused key 'student_insights' error):
557
+ # """Main method to generate analytics from chat histories."""
558
+ # # Preprocess chat histories
559
+ # processed_histories = self._preprocess_chat_histories(chat_histories)
560
+
561
+ # # Create and send prompt to Gemini
562
+ # prompt = self._create_analytics_prompt(processed_histories, all_topics)
563
+ # response = self.model.generate_content(
564
+ # prompt,
565
+ # generation_config=genai.GenerationConfig(
566
+ # response_mime_type="application/json",
567
+ # response_schema=AnalyticsResponse
568
+ # )
569
+ # )
570
+
571
+ # # # Process and enrich analytics
572
+ # # analytics = self._process_gemini_response(response.text)
573
+ # # return analytics
574
+ # # Process, validate, and enrich the response
575
+ # analytics = self._process_gemini_response(response.text)
576
+
577
+ # # Then cast it to satisfy the type checker
578
+ # return typing.cast(AnalyticsResponse, analytics)
579
+
580
+ # Method 2 (possible fix):
581
+ # """Main method to generate analytics with better error handling."""
582
+ # try:
583
+ # processed_histories = self._preprocess_chat_histories(chat_histories)
584
+ # prompt = self._create_analytics_prompt(processed_histories, all_topics)
585
+
586
+ # response = self.model.generate_content(
587
+ # prompt,
588
+ # generation_config=genai.GenerationConfig(
589
+ # response_mime_type="application/json",
590
+ # temperature=0.15
591
+ # # response_schema=AnalyticsResponse
592
+ # )
593
+ # )
594
+
595
+ # if not response.text:
596
+ # print("Empty response from Gemini")
597
+ # return self._fallback_analytics()
598
+
599
+ # # analytics = self._process_gemini_response(response.text)
600
+ # # return typing.cast(AnalyticsResponse, analytics)
601
+ # # return response.text;
602
+ # analytics = json.loads(response.text)
603
+ # return analytics
604
+
605
+ # except Exception as e:
606
+ # print(f"Error generating analytics: {str(e)}")
607
+ # return self._fallback_analytics()
608
+
609
+
610
+ # Debugging code:
611
+ """Main method to generate analytics with better error handling."""
612
+ try:
613
+ # Debug print for input validation
614
+ print("Input validation:")
615
+ print(f"Chat histories: {len(chat_histories)} entries")
616
+ print(f"Topics: {all_topics}")
617
+
618
+ if not chat_histories or not all_topics:
619
+ print("Missing required input data")
620
+ return self._fallback_analytics()
621
+
622
+ # Debug the preprocessing step
623
+ try:
624
+ processed_histories = self._preprocess_chat_histories(chat_histories)
625
+ print("Successfully preprocessed chat histories")
626
+ except Exception as preprocess_error:
627
+ print(f"Error in preprocessing: {str(preprocess_error)}")
628
+ return self._fallback_analytics()
629
+
630
+ # Debug the prompt creation
631
+ try:
632
+ prompt = self._create_analytics_prompt(processed_histories, all_topics)
633
+ print("Successfully created prompt")
634
+ print("Prompt preview:", prompt[:200] + "...") # Print first 200 chars
635
+ except Exception as prompt_error:
636
+ print(f"Error in prompt creation: {str(prompt_error)}")
637
+ return self._fallback_analytics()
638
+
639
+ # Rest of the function remains the same
640
+ response = self.model.generate_content(
641
+ prompt,
642
+ generation_config=genai.GenerationConfig(
643
+ response_mime_type="application/json",
644
+ temperature=0.15
645
+ )
646
+ )
647
+
648
+ if not response.text:
649
+ print("Empty response from Gemini")
650
+ return self._fallback_analytics()
651
+
652
+ analytics = json.loads(response.text)
653
+ return analytics
654
+
655
+ except Exception as e:
656
+ print(f"Error generating analytics: {str(e)}")
657
+ print(f"Error type: {type(e)}")
658
+ import traceback
659
+ print("Full traceback:", traceback.format_exc())
660
+ return self._fallback_analytics()
661
+
662
+ def _preprocess_chat_histories(self, chat_histories: List[Dict]) -> List[Dict]:
663
+ # """Preprocess chat histories to focus on relevant information."""
664
+ # processed = []
665
+
666
+ # for chat in chat_histories:
667
+ # print(str(chat["user_id"]))
668
+ # processed_chat = {
669
+ # "user_id": str(chat["user_id"]),
670
+ # "messages": [
671
+ # {
672
+ # "prompt": msg["prompt"],
673
+ # "response": msg["response"]
674
+ # }
675
+ # for msg in chat["messages"]
676
+ # ]
677
+ # }
678
+ # processed.append(processed_chat)
679
+
680
+ # return processed
681
+
682
+ # Code 2:
683
+ """Preprocess chat histories to focus on relevant information."""
684
+ processed = []
685
+
686
+ for chat in chat_histories:
687
+ # Convert ObjectId to string if it's an ObjectId
688
+ user_id = str(chat["user_id"]["$oid"]) if isinstance(chat["user_id"], dict) and "$oid" in chat["user_id"] else str(chat["user_id"])
689
+
690
+ try:
691
+ processed_chat = {
692
+ "user_id": user_id,
693
+ "messages": [
694
+ {
695
+ "prompt": msg["prompt"],
696
+ "response": msg["response"]
697
+ }
698
+ for msg in chat["messages"]
699
+ ]
700
+ }
701
+ processed.append(processed_chat)
702
+ print(f"Successfully processed chat for user: {user_id}")
703
+ except Exception as e:
704
+ print(f"Error processing chat for user: {user_id}")
705
+ print(f"Error details: {str(e)}")
706
+ continue
707
+
708
+ return processed
709
+
710
+ def _fallback_analytics(self) -> Dict:
711
+ # """Provide basic analytics in case of LLM processing failure."""
712
+ # return {
713
+ # "topic_insights": [],
714
+ # "student_insights": [],
715
+ # "recommended_actions": [
716
+ # {
717
+ # "action": "Review analytics generation process",
718
+ # "priority": "high",
719
+ # "target_group": "system_administrators",
720
+ # "reasoning": "Analytics generation failed",
721
+ # "expected_impact": "Restore analytics functionality"
722
+ # }
723
+ # ]
724
+ # }
725
+ """Provide comprehensive fallback analytics that match our schema."""
726
+ return {
727
+ "topic_insights": [],
728
+ "student_insights": [],
729
+ "recommended_actions": [
730
+ {
731
+ "action": "Review analytics generation process",
732
+ "priority": "high",
733
+ "target_group": "system_administrators",
734
+ "reasoning": "Analytics generation failed",
735
+ "expected_impact": "Restore analytics functionality"
736
+ }
737
+ ],
738
+ "course_health": {
739
+ "overall_engagement": 0,
740
+ "critical_topics": [],
741
+ "class_distribution": {
742
+ "high_performers": 0,
743
+ "average_performers": 0,
744
+ "at_risk": 0
745
+ }
746
+ },
747
+ "intervention_metrics": {
748
+ "immediate_attention_needed": [],
749
+ "monitoring_required": []
750
+ }
751
+ }
752
+
753
+ # if __name__ == "__main__":
754
+ # # Example usage
755
+
756
+
757
+ # analytics_generator = NovaScholarAnalytics()
758
+ # analytics = analytics_generator.generate_analytics(chat_histories, all_topics)
759
+ # print(json.dumps(analytics, indent=2))
pre_class_analytics4.py ADDED
@@ -0,0 +1,592 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from datetime import datetime
4
+ from typing import List, Dict, Any, Tuple
5
+ import spacy
6
+ from collections import Counter, defaultdict
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+ from textblob import TextBlob
10
+ import networkx as nx
11
+ from scipy import stats
12
+ import logging
13
+ import json
14
+ from dataclasses import dataclass
15
+ from enum import Enum
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+ class TopicDifficulty(Enum):
22
+ EASY = "easy"
23
+ MODERATE = "moderate"
24
+ DIFFICULT = "difficult"
25
+ VERY_DIFFICULT = "very_difficult"
26
+
27
+
28
+ @dataclass
29
+ class QuestionMetrics:
30
+ complexity_score: float
31
+ follow_up_count: int
32
+ clarification_count: int
33
+ time_spent: float
34
+ sentiment_score: float
35
+
36
+ @dataclass
37
+ class TopicInsights:
38
+ difficulty_level: TopicDifficulty
39
+ common_confusion_points: List[str]
40
+ question_patterns: List[str]
41
+ time_distribution: Dict[str, float]
42
+ engagement_metrics: Dict[str, float]
43
+ recommended_focus_areas: List[str]
44
+
45
+ def to_dict(self):
46
+ return {
47
+ "difficulty_level": self.difficulty_level.value, # Convert enum to its value
48
+ "common_confusion_points": self.common_confusion_points,
49
+ "question_patterns": self.question_patterns,
50
+ "time_distribution": {str(k): v for k, v in self.time_distribution.items()},
51
+ "engagement_metrics": self.engagement_metrics,
52
+ "recommended_focus_areas": self.recommended_focus_areas,
53
+ }
54
+
55
+ class PreClassAnalytics:
56
+ def __init__(self, nlp_model: str = "en_core_web_lg"):
57
+ """Initialize the analytics system with necessary components."""
58
+ self.nlp = spacy.load(nlp_model)
59
+ self.question_indicators = {
60
+ "what", "why", "how", "when", "where", "which", "who",
61
+ "whose", "whom", "can", "could", "would", "will", "explain"
62
+ }
63
+ self.confusion_indicators = {
64
+ "confused", "don't understand", "unclear", "not clear",
65
+ "stuck", "difficult", "hard", "help", "explain again"
66
+ }
67
+ self.follow_up_indicators = {
68
+ "also", "another", "additionally", "furthermore", "moreover",
69
+ "besides", "related", "similarly", "again"
70
+ }
71
+
72
+ def preprocess_chat_history(self, chat_history: List[Dict]) -> pd.DataFrame:
73
+ """Convert chat history to DataFrame with enhanced features."""
74
+ messages = []
75
+ for chat in chat_history:
76
+ user_id = chat['user_id']['$oid']
77
+ for msg in chat['messages']:
78
+ try:
79
+ # Ensure the timestamp is in the correct format
80
+ if isinstance(msg['timestamp'], dict) and '$date' in msg['timestamp']:
81
+ timestamp = pd.to_datetime(msg['timestamp']['$date'])
82
+ elif isinstance(msg['timestamp'], str):
83
+ timestamp = pd.to_datetime(msg['timestamp'])
84
+ else:
85
+ raise ValueError("Invalid timestamp format")
86
+ except Exception as e:
87
+ print(f"Error parsing timestamp: {msg['timestamp']}, error: {e}")
88
+ timestamp = pd.NaT # Use NaT (Not a Time) for invalid timestamps
89
+
90
+ messages.append({
91
+ 'user_id': user_id,
92
+ 'timestamp': timestamp,
93
+ 'prompt': msg['prompt'],
94
+ 'response': msg['response'],
95
+ 'is_question': any(q in msg['prompt'].lower() for q in self.question_indicators),
96
+ 'shows_confusion': any(c in msg['prompt'].lower() for c in self.confusion_indicators),
97
+ 'is_followup': any(f in msg['prompt'].lower() for f in self.follow_up_indicators)
98
+ })
99
+
100
+ df = pd.DataFrame(messages)
101
+ df['sentiment'] = df['prompt'].apply(lambda x: TextBlob(x).sentiment.polarity)
102
+ return df
103
+
104
+ def extract_topic_hierarchies(self, df: pd.DataFrame) -> Dict[str, List[str]]:
105
+ """Extract hierarchical topic relationships from conversations."""
106
+ topic_hierarchy = defaultdict(list)
107
+
108
+ for _, row in df.iterrows():
109
+ doc = self.nlp(row['prompt'])
110
+
111
+ # Extract main topics and subtopics using noun chunks and dependencies
112
+ main_topics = []
113
+ subtopics = []
114
+
115
+ for chunk in doc.noun_chunks:
116
+ if chunk.root.dep_ in ('nsubj', 'dobj'):
117
+ main_topics.append(chunk.text.lower())
118
+ else:
119
+ subtopics.append(chunk.text.lower())
120
+
121
+ # Build hierarchy
122
+ for main_topic in main_topics:
123
+ topic_hierarchy[main_topic].extend(subtopics)
124
+
125
+ # Clean and deduplicate
126
+ return {k: list(set(v)) for k, v in topic_hierarchy.items()}
127
+
128
+ def analyze_topic_difficulty(self, df: pd.DataFrame, topic: str) -> TopicDifficulty:
129
+ """Determine topic difficulty based on various metrics."""
130
+ topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
131
+
132
+ # Calculate difficulty indicators
133
+ confusion_rate = topic_msgs['shows_confusion'].mean()
134
+ question_rate = topic_msgs['is_question'].mean()
135
+ follow_up_rate = topic_msgs['is_followup'].mean()
136
+ avg_sentiment = topic_msgs['sentiment'].mean()
137
+
138
+ # Calculate composite difficulty score
139
+ difficulty_score = (
140
+ confusion_rate * 0.4 +
141
+ question_rate * 0.3 +
142
+ follow_up_rate * 0.2 +
143
+ (1 - (avg_sentiment + 1) / 2) * 0.1
144
+ )
145
+
146
+ # Map score to difficulty level
147
+ if difficulty_score < 0.3:
148
+ return TopicDifficulty.EASY
149
+ elif difficulty_score < 0.5:
150
+ return TopicDifficulty.MODERATE
151
+ elif difficulty_score < 0.7:
152
+ return TopicDifficulty.DIFFICULT
153
+ else:
154
+ return TopicDifficulty.VERY_DIFFICULT
155
+
156
+ def identify_confusion_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
157
+ """Identify common patterns in student confusion."""
158
+ confused_msgs = df[
159
+ (df['prompt'].str.contains(topic, case=False)) &
160
+ (df['shows_confusion'])
161
+ ]['prompt']
162
+
163
+ patterns = []
164
+ for msg in confused_msgs:
165
+ doc = self.nlp(msg)
166
+
167
+ # Extract key phrases around confusion indicators
168
+ for sent in doc.sents:
169
+ for token in sent:
170
+ if token.text.lower() in self.confusion_indicators:
171
+ # Get context window around confusion indicator
172
+ context = sent.text
173
+ patterns.append(context)
174
+
175
+ # Group similar patterns
176
+ if patterns:
177
+ vectorizer = TfidfVectorizer(ngram_range=(1, 3))
178
+ tfidf_matrix = vectorizer.fit_transform(patterns)
179
+ similarity_matrix = cosine_similarity(tfidf_matrix)
180
+
181
+ # Cluster similar patterns
182
+ G = nx.Graph()
183
+ for i in range(len(patterns)):
184
+ for j in range(i + 1, len(patterns)):
185
+ if similarity_matrix[i][j] > 0.5: # Similarity threshold
186
+ G.add_edge(i, j)
187
+
188
+ # Extract representative patterns from each cluster
189
+ clusters = list(nx.connected_components(G))
190
+ return [patterns[min(cluster)] for cluster in clusters]
191
+
192
+ return []
193
+
194
+ def analyze_question_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
195
+ """Analyze patterns in student questions about the topic."""
196
+ topic_questions = df[
197
+ (df['prompt'].str.contains(topic, case=False)) &
198
+ (df['is_question'])
199
+ ]['prompt']
200
+
201
+ question_types = defaultdict(list)
202
+ for question in topic_questions:
203
+ doc = self.nlp(question)
204
+
205
+ # Categorize questions
206
+ if any(token.text.lower() in {"what", "define", "explain"} for token in doc):
207
+ question_types["conceptual"].append(question)
208
+ elif any(token.text.lower() in {"how", "steps", "process"} for token in doc):
209
+ question_types["procedural"].append(question)
210
+ elif any(token.text.lower() in {"why", "reason", "because"} for token in doc):
211
+ question_types["reasoning"].append(question)
212
+ else:
213
+ question_types["other"].append(question)
214
+
215
+ # Extract patterns from each category
216
+ patterns = []
217
+ for category, questions in question_types.items():
218
+ if questions:
219
+ vectorizer = TfidfVectorizer(ngram_range=(1, 3))
220
+ tfidf_matrix = vectorizer.fit_transform(questions)
221
+
222
+ # Get most representative questions
223
+ feature_array = np.mean(tfidf_matrix.toarray(), axis=0)
224
+ tfidf_sorting = np.argsort(feature_array)[::-1]
225
+ features = vectorizer.get_feature_names_out()
226
+
227
+ patterns.append(f"{category}: {' '.join(features[tfidf_sorting[:3]])}")
228
+
229
+ return patterns
230
+
231
+ def analyze_time_distribution(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
232
+ """Analyze time spent on different aspects of the topic."""
233
+ topic_msgs = df[df['prompt'].str.contains(topic, case=False)].copy()
234
+ if len(topic_msgs) < 2:
235
+ return {}
236
+
237
+ topic_msgs['time_diff'] = topic_msgs['timestamp'].diff()
238
+
239
+ # Calculate time distribution
240
+ distribution = {
241
+ 'total_time': topic_msgs['time_diff'].sum().total_seconds() / 60,
242
+ 'avg_time_per_message': topic_msgs['time_diff'].mean().total_seconds() / 60,
243
+ 'max_time_gap': topic_msgs['time_diff'].max().total_seconds() / 60,
244
+ 'time_spent_on_questions': topic_msgs[topic_msgs['is_question']]['time_diff'].sum().total_seconds() / 60,
245
+ 'time_spent_on_confusion': topic_msgs[topic_msgs['shows_confusion']]['time_diff'].sum().total_seconds() / 60
246
+ }
247
+
248
+ return distribution
249
+
250
+ def calculate_engagement_metrics(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
251
+ """Calculate student engagement metrics for the topic."""
252
+ topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
253
+
254
+ metrics = {
255
+ 'message_count': len(topic_msgs),
256
+ 'question_ratio': topic_msgs['is_question'].mean(),
257
+ 'confusion_ratio': topic_msgs['shows_confusion'].mean(),
258
+ 'follow_up_ratio': topic_msgs['is_followup'].mean(),
259
+ 'avg_sentiment': topic_msgs['sentiment'].mean(),
260
+ 'engagement_score': 0.0 # Will be calculated below
261
+ }
262
+
263
+ # Calculate engagement score
264
+ metrics['engagement_score'] = (
265
+ metrics['message_count'] * 0.3 +
266
+ metrics['question_ratio'] * 0.25 +
267
+ metrics['follow_up_ratio'] * 0.25 +
268
+ (metrics['avg_sentiment'] + 1) / 2 * 0.2 # Normalize sentiment to 0-1
269
+ )
270
+
271
+ return metrics
272
+
273
+ def generate_topic_insights(self, df: pd.DataFrame, topic: str) -> TopicInsights:
274
+ """Generate comprehensive insights for a topic."""
275
+ difficulty = self.analyze_topic_difficulty(df, topic)
276
+ confusion_points = self.identify_confusion_patterns(df, topic)
277
+ question_patterns = self.analyze_question_patterns(df, topic)
278
+ time_distribution = self.analyze_time_distribution(df, topic)
279
+ engagement_metrics = self.calculate_engagement_metrics(df, topic)
280
+
281
+ # Generate recommended focus areas based on insights
282
+ focus_areas = []
283
+
284
+ if difficulty in (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT):
285
+ focus_areas.append("Fundamental concept reinforcement needed")
286
+
287
+ if confusion_points:
288
+ focus_areas.append(f"Address common confusion around: {', '.join(confusion_points[:3])}")
289
+
290
+ if engagement_metrics['confusion_ratio'] > 0.3:
291
+ focus_areas.append("Consider alternative teaching approaches")
292
+
293
+ if time_distribution.get('time_spent_on_questions', 0) > time_distribution.get('total_time', 0) * 0.5:
294
+ focus_areas.append("More practical examples or demonstrations needed")
295
+
296
+ return TopicInsights(
297
+ difficulty_level=difficulty,
298
+ common_confusion_points=confusion_points,
299
+ question_patterns=question_patterns,
300
+ time_distribution=time_distribution,
301
+ engagement_metrics=engagement_metrics,
302
+ recommended_focus_areas=focus_areas
303
+ )
304
+
305
+ def analyze_student_progress(self, df: pd.DataFrame) -> Dict[str, Any]:
306
+ """Analyze individual student progress and learning patterns."""
307
+ student_progress = {}
308
+
309
+ for student_id in df['user_id'].unique():
310
+ student_msgs = df[df['user_id'] == student_id]
311
+
312
+ # Calculate student-specific metrics
313
+ progress = {
314
+ 'total_messages': len(student_msgs),
315
+ 'questions_asked': student_msgs['is_question'].sum(),
316
+ 'confusion_instances': student_msgs['shows_confusion'].sum(),
317
+ 'avg_sentiment': student_msgs['sentiment'].mean(),
318
+ 'topic_engagement': {},
319
+ 'learning_pattern': self._identify_learning_pattern(student_msgs)
320
+ }
321
+
322
+ # Analyze topic-specific engagement
323
+ topics = self.extract_topic_hierarchies(student_msgs)
324
+ for topic in topics:
325
+ topic_msgs = student_msgs[student_msgs['prompt'].str.contains(topic, case=False)]
326
+ progress['topic_engagement'][topic] = {
327
+ 'message_count': len(topic_msgs),
328
+ 'confusion_rate': topic_msgs['shows_confusion'].mean(),
329
+ 'sentiment_trend': stats.linregress(
330
+ range(len(topic_msgs)),
331
+ topic_msgs['sentiment']
332
+ ).slope
333
+ }
334
+
335
+ student_progress[student_id] = progress
336
+
337
+ return student_progress
338
+
339
+ def _identify_learning_pattern(self, student_msgs: pd.DataFrame) -> str:
340
+ """Identify student's learning pattern based on their interaction style."""
341
+ # Calculate key metrics
342
+ question_ratio = student_msgs['is_question'].mean()
343
+ confusion_ratio = student_msgs['shows_confusion'].mean()
344
+ follow_up_ratio = student_msgs['is_followup'].mean()
345
+ sentiment_trend = stats.linregress(
346
+ range(len(student_msgs)),
347
+ student_msgs['sentiment']
348
+ ).slope
349
+
350
+ # Identify pattern
351
+ if question_ratio > 0.6:
352
+ return "Inquisitive Learner"
353
+ elif confusion_ratio > 0.4:
354
+ return "Needs Additional Support"
355
+ elif follow_up_ratio > 0.5:
356
+ return "Deep Dive Learner"
357
+ elif sentiment_trend > 0:
358
+ return "Progressive Learner"
359
+ else:
360
+ return "Steady Learner"
361
+
362
+ def generate_comprehensive_report(self, chat_history: List[Dict]) -> Dict[str, Any]:
363
+ """Generate a comprehensive analytics report."""
364
+ # Preprocess chat history
365
+ df = self.preprocess_chat_history(chat_history)
366
+
367
+ # Extract topics
368
+ topics = self.extract_topic_hierarchies(df)
369
+
370
+ report = {
371
+ 'topics': {},
372
+ 'student_progress': self.analyze_student_progress(df),
373
+ 'overall_metrics': {
374
+ 'total_conversations': len(df),
375
+ 'unique_students': df['user_id'].nunique(),
376
+ 'avg_sentiment': df['sentiment'].mean(),
377
+ 'most_discussed_topics': Counter(
378
+ topic for topics_list in topics.values()
379
+ for topic in topics_list
380
+ ).most_common(5)
381
+ }
382
+ }
383
+
384
+ # Generate topic-specific insights
385
+ for main_topic, subtopics in topics.items():
386
+ subtopic_insights = {}
387
+ for subtopic in subtopics:
388
+ subtopic_insights[subtopic] = {
389
+ 'insights': self.generate_topic_insights(df, subtopic),
390
+ 'related_topics': [t for t in subtopics if t != subtopic],
391
+ 'student_engagement': {
392
+ student_id: self.calculate_engagement_metrics(
393
+ df[df['user_id'] == student_id],
394
+ subtopic
395
+ )
396
+ for student_id in df['user_id'].unique()
397
+ }
398
+ }
399
+
400
+ report['topics'][main_topic] = {
401
+ 'insights': self.generate_topic_insights(df, main_topic),
402
+ 'subtopics': subtopic_insights,
403
+ 'topic_relationships': {
404
+ 'hierarchy_depth': len(subtopics),
405
+ 'connection_strength': self._calculate_topic_connections(df, main_topic, subtopics),
406
+ 'progression_path': self._identify_topic_progression(df, main_topic, subtopics)
407
+ }
408
+ }
409
+
410
+ # Add temporal analysis
411
+ report['temporal_analysis'] = {
412
+ 'daily_engagement': df.groupby(df['timestamp'].dt.date).agg({
413
+ 'user_id': 'count',
414
+ 'is_question': 'sum',
415
+ 'shows_confusion': 'sum',
416
+ 'sentiment': 'mean'
417
+ }).to_dict(),
418
+ 'peak_activity_hours': df.groupby(df['timestamp'].dt.hour)['user_id'].count().nlargest(3).to_dict(),
419
+ 'learning_trends': self._analyze_learning_trends(df)
420
+ }
421
+
422
+ # Add recommendations
423
+ report['recommendations'] = self._generate_recommendations(report)
424
+
425
+ return report
426
+
427
+ def _calculate_topic_connections(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> Dict[str, float]:
428
+ """Calculate connection strength between topics based on co-occurrence."""
429
+ connections = {}
430
+ main_topic_msgs = df[df['prompt'].str.contains(main_topic, case=False)]
431
+
432
+ for subtopic in subtopics:
433
+ cooccurrence = df[
434
+ df['prompt'].str.contains(main_topic, case=False) &
435
+ df['prompt'].str.contains(subtopic, case=False)
436
+ ].shape[0]
437
+
438
+ connection_strength = cooccurrence / len(main_topic_msgs) if len(main_topic_msgs) > 0 else 0
439
+ connections[subtopic] = connection_strength
440
+
441
+ return connections
442
+
443
+ def _identify_topic_progression(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> List[str]:
444
+ """Identify optimal topic progression path based on student interactions."""
445
+ topic_difficulties = {}
446
+
447
+ for subtopic in subtopics:
448
+ difficulty = self.analyze_topic_difficulty(df, subtopic)
449
+ topic_difficulties[subtopic] = difficulty.value
450
+
451
+ # Sort subtopics by difficulty
452
+ return sorted(subtopics, key=lambda x: topic_difficulties[x])
453
+
454
+ def _analyze_learning_trends(self, df: pd.DataFrame) -> Dict[str, Any]:
455
+ """Analyze overall learning trends across the dataset."""
456
+ return {
457
+ 'sentiment_trend': stats.linregress(
458
+ range(len(df)),
459
+ df['sentiment']
460
+ )._asdict(),
461
+ 'confusion_trend': stats.linregress(
462
+ range(len(df)),
463
+ df['shows_confusion']
464
+ )._asdict(),
465
+ 'engagement_progression': self._calculate_engagement_progression(df)
466
+ }
467
+
468
+ def _calculate_engagement_progression(self, df: pd.DataFrame) -> Dict[str, float]:
469
+ """Calculate how student engagement changes over time."""
470
+ df['week'] = df['timestamp'].dt.isocalendar().week
471
+ weekly_engagement = df.groupby('week').agg({
472
+ 'is_question': 'mean',
473
+ 'shows_confusion': 'mean',
474
+ 'is_followup': 'mean',
475
+ 'sentiment': 'mean'
476
+ })
477
+
478
+ return {
479
+ 'question_trend': stats.linregress(
480
+ range(len(weekly_engagement)),
481
+ weekly_engagement['is_question']
482
+ ).slope,
483
+ 'confusion_trend': stats.linregress(
484
+ range(len(weekly_engagement)),
485
+ weekly_engagement['shows_confusion']
486
+ ).slope,
487
+ 'follow_up_trend': stats.linregress(
488
+ range(len(weekly_engagement)),
489
+ weekly_engagement['is_followup']
490
+ ).slope,
491
+ 'sentiment_trend': stats.linregress(
492
+ range(len(weekly_engagement)),
493
+ weekly_engagement['sentiment']
494
+ ).slope
495
+ }
496
+
497
+ def _generate_recommendations(self, report: Dict[str, Any]) -> List[str]:
498
+ """Generate actionable recommendations based on the analysis."""
499
+ recommendations = []
500
+
501
+ # Analyze difficulty distribution
502
+ difficult_topics = [
503
+ topic for topic, data in report['topics'].items()
504
+ if data['insights'].difficulty_level in
505
+ (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT)
506
+ ]
507
+
508
+ if difficult_topics:
509
+ recommendations.append(
510
+ f"Consider providing additional resources for challenging topics: {', '.join(difficult_topics)}"
511
+ )
512
+
513
+ # Analyze student engagement
514
+ avg_engagement = np.mean([
515
+ progress['questions_asked'] / progress['total_messages']
516
+ for progress in report['student_progress'].values()
517
+ ])
518
+
519
+ if avg_engagement < 0.3:
520
+ recommendations.append(
521
+ "Implement more interactive elements to increase student engagement"
522
+ )
523
+
524
+ # Analyze temporal patterns
525
+ peak_hours = list(report['temporal_analysis']['peak_activity_hours'].keys())
526
+ recommendations.append(
527
+ f"Consider scheduling additional support during peak activity hours: {peak_hours}"
528
+ )
529
+
530
+ # Analyze learning trends
531
+ # sentiment_trend = report['temporal_analysis']['learning_trends']['sentiment_trend']
532
+ # if sentiment_trend < 0:
533
+ # recommendations.append(
534
+ # "Review teaching approach to address declining student satisfaction"
535
+ # )
536
+ # Analyze learning trends
537
+ # Analyze learning trends
538
+ sentiment_trend = report.get('temporal_analysis', {}).get('learning_trends', {}).get('sentiment_trend', None)
539
+ if isinstance(sentiment_trend, (int, float)):
540
+ if sentiment_trend < 0:
541
+ recommendations.append(
542
+ "Review teaching approach to address declining student satisfaction"
543
+ )
544
+ elif isinstance(sentiment_trend, dict):
545
+ # Handle the case where sentiment_trend is a dictionary
546
+ print(f"Unexpected dict format for sentiment_trend: {sentiment_trend}")
547
+ else:
548
+ print(f"Unexpected type for sentiment_trend: {type(sentiment_trend)}")
549
+
550
+ return recommendations
551
+
552
+ class CustomJSONEncoder(json.JSONEncoder):
553
+ def default(self, obj):
554
+ if isinstance(obj, TopicDifficulty):
555
+ return obj.value
556
+ if isinstance(obj, TopicInsights):
557
+ return obj.to_dict()
558
+ if isinstance(obj, np.integer):
559
+ return int(obj)
560
+ if isinstance(obj, np.floating):
561
+ return float(obj)
562
+ if isinstance(obj, np.ndarray):
563
+ return obj.tolist()
564
+ if isinstance(obj, datetime):
565
+ return obj.isoformat()
566
+ return super().default(obj)
567
+
568
+ def convert_insights_to_dict(report):
569
+ for main_topic, data in report['topics'].items():
570
+ if isinstance(data['insights'], TopicInsights):
571
+ data['insights'] = data['insights'].to_dict()
572
+ for subtopic, subdata in data['subtopics'].items():
573
+ if isinstance(subdata['insights'], TopicInsights):
574
+ subdata['insights'] = subdata['insights'].to_dict()
575
+
576
+ if __name__ == "__main__":
577
+ # Load chat history data
578
+ chat_history = None
579
+ with open('sample_files/chat_history_corpus.json', 'r', encoding="utf-8") as file:
580
+ chat_history = json.load(file)
581
+
582
+ # Initialize analytics system
583
+ analytics = PreClassAnalytics()
584
+
585
+ # Generate comprehensive report
586
+ report = analytics.generate_comprehensive_report(chat_history)
587
+
588
+ # Convert insights to dictionary
589
+ # convert_insights_to_dict(report)
590
+
591
+ print(json.dumps(report, indent=4, cls=CustomJSONEncoder))
592
+ # print(report)
requirements.txt ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pymongo
3
+ PyPDF2
4
+ python-docx
5
+ openai
6
+ google-generativeai
7
+ llama-index
8
+ werkzeug
9
+ numpy
10
+ pandas
11
+ plotly
12
+ scikit-learn
13
+ networkx
14
+ community
15
+ umap-learn
16
+ seaborn
17
+ matplotlib
18
+ scipy
19
+ Pillow
20
+ python-dotenv
21
+ zoomus
22
+ asyncio
23
+ google-auth-oauthlib
24
+ google-auth
25
+ transformers
26
+ textstat
27
+ spacy
28
+ streamlit_option_menu
29
+ beautifulsoup4
30
+ youtube-transcript-api
31
+ requests
32
+ xml==0.0.1
33
+ networkx==3.1
34
+ bokeh==3.2.1
35
+
36
+ scikit-learn==1.2.2
37
+ langchain==0.0.208
research22.py ADDED
@@ -0,0 +1,517 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # if __name__ == "__main__":
2
+ # main()
3
+ import streamlit as st
4
+ import google.generativeai as genai
5
+ from typing import Dict, Any
6
+ import PyPDF2
7
+ import io
8
+ from pymongo import MongoClient
9
+ from dotenv import load_dotenv
10
+ import os
11
+ import json
12
+ import re
13
+
14
+ # --------------------------------------------------------------------------------
15
+ # 1. Environment Setup
16
+ # --------------------------------------------------------------------------------
17
+ load_dotenv()
18
+ # MongoDB
19
+ MONGODB_URI = os.getenv(
20
+ "MONGODB_UR",
21
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
22
+ )
23
+ # Gemini
24
+ GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
25
+
26
+ # Configure Gemini
27
+ genai.configure(api_key=GEMINI_KEY)
28
+
29
+
30
+ # --------------------------------------------------------------------------------
31
+ # 2. Database Connection
32
+ # --------------------------------------------------------------------------------
33
+ def create_db_connection():
34
+ """
35
+ Create MongoDB connection and return the 'papers' collection.
36
+ """
37
+ try:
38
+ client = MongoClient(MONGODB_URI)
39
+ db = client["novascholar_db"] # Database name
40
+ collection = db["research_papers"] # Collection name
41
+ # Ping to confirm connection
42
+ client.admin.command("ping")
43
+ return db
44
+ except Exception as e:
45
+ st.error(f"Database connection error: {str(e)}")
46
+ return None
47
+
48
+
49
+ # --------------------------------------------------------------------------------
50
+ # 3. PDF Text Extraction
51
+ # --------------------------------------------------------------------------------
52
+ def extract_text_from_pdf(pdf_file) -> str:
53
+ """
54
+ Extract all text from a PDF.
55
+ """
56
+ try:
57
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
58
+ text = ""
59
+ for page in pdf_reader.pages:
60
+ text += page.extract_text() + "\n"
61
+ return text
62
+ except Exception as e:
63
+ st.error(f"Error processing PDF: {str(e)}")
64
+ return ""
65
+
66
+
67
+ # --------------------------------------------------------------------------------
68
+ # 4. Gemini Response Helper
69
+ # --------------------------------------------------------------------------------
70
+ def get_gemini_response(prompt: str) -> str:
71
+ """
72
+ Sends a prompt to Google's Gemini model and returns the response text.
73
+ Adjust this function as needed for your generative AI usage.
74
+ """
75
+ try:
76
+ model = genai.GenerativeModel("gemini-pro")
77
+ response = model.generate_content(prompt)
78
+ return response.text
79
+ except Exception as e:
80
+ st.error(f"Gemini API Error: {str(e)}")
81
+ return ""
82
+
83
+
84
+ # --------------------------------------------------------------------------------
85
+ # 5. Basic Info Extraction
86
+ # --------------------------------------------------------------------------------
87
+ def extract_basic_info(text: str) -> Dict[str, str]:
88
+ """
89
+ Extract title, publication, journal/conference, abstract, keywords, author, and date from the paper text.
90
+ Return a dictionary with these fields.
91
+ """
92
+ prompt = f"""
93
+ Extract the following fields from the research paper text below:
94
+
95
+ Title
96
+ Publication
97
+ Journal_Conference
98
+ Abstract
99
+ Keywords
100
+ Author
101
+ Date_of_Publication
102
+
103
+ Paper text:
104
+ {text}
105
+
106
+ Return them in this format:
107
+ Title: ...
108
+ Publication: ...
109
+ Journal_Conference: ...
110
+ Abstract: ...
111
+ Keywords: ...
112
+ Author: ...
113
+ Date_of_Publication: ...
114
+ """
115
+ response = get_gemini_response(prompt)
116
+ if not response:
117
+ return {}
118
+ info = {}
119
+ lines = response.split("\n")
120
+ for line in lines:
121
+ if ":" in line:
122
+ key, value = line.split(":", 1)
123
+ info[key.strip()] = value.strip()
124
+ return info
125
+
126
+
127
+ # --------------------------------------------------------------------------------
128
+ # 6. Content Sections Extraction
129
+ # --------------------------------------------------------------------------------
130
+ def extract_content_sections(text: str) -> Dict[str, str]:
131
+ """
132
+ Extract expanded sections: Intro, Literature_Review, Research_Models_Used,
133
+ Methodology, Discussion, Future_Scope, Theory.
134
+ """
135
+ prompt = f"""Please extract these sections from the research paper:
136
+ 1. Introduction
137
+ 2. Literature Review
138
+ 3. Research Models Used
139
+ 4. Methodology
140
+ 5. Discussion
141
+ 6. Future Scope
142
+ 7. Theory
143
+
144
+ Paper text: {text}
145
+
146
+ Return in this exact format without any additional text or explanations also make sure
147
+ no data should be empty (at least 10-15 words) and it should be meaningful:
148
+ Intro: <text>
149
+ Literature_Review: <text>
150
+ Research_Models_Used: <text>
151
+ Methodology: <text>
152
+ Discussion: <text>
153
+ Future_Scope: <text>
154
+ Theory: <text>
155
+ """
156
+ response = get_gemini_response(prompt)
157
+ if not response:
158
+ return {}
159
+ sections = {}
160
+ lines = response.split("\n")
161
+ for line in lines:
162
+ if ":" in line:
163
+ key, value = line.split(":", 1)
164
+ sections[key.strip()] = value.strip()
165
+ return sections
166
+
167
+
168
+ # --------------------------------------------------------------------------------
169
+ # 7. Variables Extraction
170
+ # --------------------------------------------------------------------------------
171
+ def extract_variables(text: str) -> Dict[str, Any]:
172
+ """
173
+ Extract variable data: Independent_Variables, nof_Independent_Variables,
174
+ Dependent_Variables, nof_Dependent_Variables, Control_Variables,
175
+ Extraneous_Variables, nof_Control_Variables, nof_Extraneous_Variables
176
+ """
177
+ prompt = f"""From the paper text, extract the following fields:
178
+ 1. Independent_Variables
179
+ 2. nof_Independent_Variables
180
+ 3. Dependent_Variables
181
+ 4. nof_Dependent_Variables
182
+ 5. Control_Variables
183
+ 6. Extraneous_Variables
184
+ 7. nof_Control_Variables
185
+ 8. nof_Extraneous_Variables
186
+
187
+ Return them in this format:
188
+ Independent_Variables: <list>
189
+ nof_Independent_Variables: <integer>
190
+ Dependent_Variables: <list>
191
+ nof_Dependent_Variables: <integer>
192
+ Control_Variables: <list>
193
+ Extraneous_Variables: <list>
194
+ nof_Control_Variables: <integer>
195
+ nof_Extraneous_Variables: <integer>
196
+
197
+ Paper text: {text}
198
+ """
199
+ response = get_gemini_response(prompt)
200
+ if not response:
201
+ return {}
202
+ variables = {}
203
+ lines = response.split("\n")
204
+ for line in lines:
205
+ if ":" in line:
206
+ key, value = line.split(":", 1)
207
+ # Attempt to convert to integer where appropriate
208
+ clean_key = key.strip()
209
+ clean_value = value.strip()
210
+ if clean_key.startswith("nof_"):
211
+ try:
212
+ variables[clean_key] = int(clean_value)
213
+ except ValueError:
214
+ # fallback if it's not an integer
215
+ variables[clean_key] = 0
216
+ else:
217
+ variables[clean_key] = clean_value
218
+ return variables
219
+
220
+
221
+ # --------------------------------------------------------------------------------
222
+ # 8. Utility to ensure no empty fields (example logic)
223
+ # --------------------------------------------------------------------------------
224
+ def ensure_non_empty_values(data: Dict[str, Any], fallback_text: str) -> Dict[str, Any]:
225
+ """
226
+ Ensure each extracted field has meaningful content. If empty, fill with default text.
227
+ """
228
+ for k, v in data.items():
229
+ if not v or len(str(v).split()) < 3: # example check for minimal words
230
+ data[k] = f"No sufficient data found for {k}. Could not parse."
231
+ return data
232
+
233
+
234
+ # --------------------------------------------------------------------------------
235
+ # 9. Processing the Paper
236
+ # --------------------------------------------------------------------------------
237
+ # def process_paper(text: str) -> Dict[str, Any]:
238
+ # """
239
+ # Orchestrate calls to extract basic info, content sections, and variables.
240
+ # Return a dictionary containing all the fields with consistent naming.
241
+ # """
242
+ # with st.spinner("Extracting basic information..."):
243
+ # basic_info = extract_basic_info(text)
244
+ # basic_info = ensure_non_empty_values(basic_info, text)
245
+
246
+ # with st.spinner("Extracting content sections..."):
247
+ # content_sections = extract_content_sections(text)
248
+ # content_sections = ensure_non_empty_values(content_sections, text)
249
+
250
+ # with st.spinner("Extracting variables..."):
251
+ # variables_info = extract_variables(text)
252
+ # variables_info = ensure_non_empty_values(variables_info, text)
253
+
254
+ # # Create a single dictionary with all fields
255
+ # paper_doc = {
256
+ # "Title": basic_info.get("Title", ""),
257
+ # "Publication": basic_info.get("Publication", ""),
258
+ # "Journal_Conference": basic_info.get("Journal_Conference", ""),
259
+ # "Abstract": basic_info.get("Abstract", ""),
260
+ # "Keywords": basic_info.get("Keywords", ""),
261
+ # "Author": basic_info.get("Author", ""),
262
+ # "Date_of_Publication": basic_info.get("Date_of_Publication", ""),
263
+ # "Intro": content_sections.get("Intro", ""),
264
+ # "Literature_Review": content_sections.get("Literature_Review", ""),
265
+ # "Research_Models_Used": content_sections.get("Research_Models_Used", ""),
266
+ # "Methodology": content_sections.get("Methodology", ""),
267
+ # "Discussion": content_sections.get("Discussion", ""),
268
+ # "Future_Scope": content_sections.get("Future_Scope", ""),
269
+ # "Theory": content_sections.get("Theory", ""),
270
+ # "Independent_Variables": variables_info.get("Independent_Variables", ""),
271
+ # "nof_Independent_Variables": variables_info.get("nof_Independent_Variables", 0),
272
+ # "Dependent_Variables": variables_info.get("Dependent_Variables", ""),
273
+ # "nof_Dependent_Variables": variables_info.get("nof_Dependent_Variables", 0),
274
+ # "Control_Variables": variables_info.get("Control_Variables", ""),
275
+ # "Extraneous_Variables": variables_info.get("Extraneous_Variables", ""),
276
+ # "nof_Control_Variables": variables_info.get("nof_Control_Variables", 0),
277
+ # "nof_Extraneous_Variables": variables_info.get("nof_Extraneous_Variables", 0),
278
+ # }
279
+
280
+ # return paper_doc
281
+
282
+ # filepath: /c:/Users/acer/OneDrive/Documents/GitHub/res-cor/research22.py
283
+ # ...existing code continues...
284
+
285
+ # --------------------------------------------------------------------------------
286
+ # 3. Paper Type Attributes
287
+ # --------------------------------------------------------------------------------
288
+ PAPER_TYPE_ATTRIBUTES = {
289
+ "Review Based Paper": [
290
+ "Title",
291
+ "Publication",
292
+ "Journal_Conference",
293
+ "Abstract",
294
+ "Keywords",
295
+ "Author",
296
+ "Date_of_Publication",
297
+ "Intro",
298
+ "Literature_Review",
299
+ "Body",
300
+ "Protocol",
301
+ "Search String",
302
+ "Included Studies",
303
+ "Data Collection and Analysis Methods",
304
+ "Data Extraction Table",
305
+ "Synthesis and Analysis",
306
+ "Conclusion",
307
+ "Limitations",
308
+ "Results",
309
+ "References",
310
+ "Risk of Bias Assessment",
311
+ ],
312
+ "Opinion/Perspective Based Paper": [
313
+ "Title",
314
+ "Publication",
315
+ "Journal_Conference",
316
+ "Abstract",
317
+ "Keywords",
318
+ "Author",
319
+ "Date_of_Publication",
320
+ "Intro",
321
+ "Literature_Review",
322
+ "Introduction",
323
+ "Body",
324
+ "Results and Discussion",
325
+ "Conclusion",
326
+ "References",
327
+ ],
328
+ "Empirical Research Paper": [
329
+ "Title",
330
+ "Publication",
331
+ "Journal_Conference",
332
+ "Abstract",
333
+ "Keywords",
334
+ "Author",
335
+ "Date_of_Publication",
336
+ "Intro",
337
+ "Literature_Review",
338
+ "Introduction",
339
+ "Body",
340
+ "Methodology",
341
+ "Participants",
342
+ "Survey Instrument",
343
+ "Data Collection",
344
+ "Data Analysis",
345
+ "Results and Discussion",
346
+ "Conclusion",
347
+ "References",
348
+ ],
349
+ "Research Paper (Other)": [
350
+ "Title",
351
+ "Publication",
352
+ "Journal_Conference",
353
+ "Abstract",
354
+ "Keywords",
355
+ "Author",
356
+ "Date_of_Publication",
357
+ "Intro",
358
+ "Literature_Review",
359
+ "Research_Models_Used",
360
+ "Methodology",
361
+ "Discussion",
362
+ "Future_Scope",
363
+ "Theory",
364
+ "Independent_Variables",
365
+ "nof_Independent_Variables",
366
+ "Dependent_Variables",
367
+ "nof_Dependent_Variables",
368
+ "Control_Variables",
369
+ "Extraneous_Variables",
370
+ "nof_Control_Variables",
371
+ "nof_Extraneous_Variables",
372
+ ],
373
+ }
374
+
375
+
376
+ # --------------------------------------------------------------------------------
377
+ # 4. Extract Paper Fields
378
+ # --------------------------------------------------------------------------------
379
+ def extract_paper_fields(text: str, paper_type: str) -> Dict[str, Any]:
380
+ """
381
+ Use Gemini to extract fields based on the paper type attributes,
382
+ then return a dictionary of extracted fields.
383
+ """
384
+ if paper_type not in PAPER_TYPE_ATTRIBUTES:
385
+ st.error("Invalid paper type selected.")
386
+ return {}
387
+
388
+ selected_attrs = PAPER_TYPE_ATTRIBUTES[paper_type]
389
+ prompt = f"""
390
+ Extract the following fields from the research paper text below:
391
+
392
+ {", ".join(selected_attrs)}
393
+
394
+ Paper text:
395
+ {text}
396
+
397
+ Return them in this JSON format strictly, with no extra text:
398
+ [
399
+ {{
400
+ {", ".join([f'"{attr}": "value"' for attr in selected_attrs])}
401
+ }}
402
+ ]
403
+ """
404
+
405
+ try:
406
+ response = get_gemini_response(prompt)
407
+ if not response:
408
+ st.error("No response from Gemini.")
409
+ return {}
410
+
411
+ # Clean up any text around JSON
412
+ # Clean up any text around JSON
413
+ raw_text = response.strip()
414
+
415
+ # Find start and end of JSON
416
+ json_start = raw_text.find("[")
417
+ json_end = raw_text.rfind("]") + 1
418
+ json_str = raw_text[json_start:json_end]
419
+
420
+ # Try removing trailing commas, extra quotes, etc.
421
+ json_str = re.sub(r",\s*}", "}", json_str)
422
+ json_str = re.sub(r",\s*\]", "]", json_str)
423
+
424
+ try:
425
+ data = json.loads(json_str)
426
+ except json.JSONDecodeError as e:
427
+ st.warning(f"Fixing JSON errors: {str(e)}")
428
+ # As a last-resort attempt, remove anything after the last curly bracket
429
+ bracket_pos = json_str.rfind("}")
430
+ if bracket_pos != -1:
431
+ json_str = json_str[: bracket_pos + 1]
432
+ # Try again
433
+ data = json.loads(json_str)
434
+
435
+ if isinstance(data, list) and len(data) > 0:
436
+ return data[0]
437
+ else:
438
+ st.error("Gemini did not return a valid JSON array.")
439
+ return {}
440
+ except Exception as e:
441
+ st.error(f"Error in Gemini extraction: {str(e)}")
442
+ return {}
443
+
444
+
445
+ # --------------------------------------------------------------------------------
446
+ # 5. Process Paper and Save
447
+ # --------------------------------------------------------------------------------
448
+ def process_paper(text: str, paper_type: str):
449
+ """
450
+ Extract paper fields based on paper type, then save to
451
+ the corresponding MongoDB collection.
452
+ """
453
+ db = create_db_connection()
454
+ if not db:
455
+ return
456
+
457
+ # Determine collection name
458
+ collection_name = paper_type.replace(" ", "_").lower()
459
+ collection = db[collection_name]
460
+
461
+ # Extract fields
462
+ extracted_data = extract_paper_fields(text, paper_type)
463
+ if extracted_data:
464
+ # Insert into MongoDB
465
+ collection.insert_one(extracted_data)
466
+ return extracted_data
467
+ return {}
468
+
469
+
470
+ # --------------------------------------------------------------------------------
471
+ # 6. Streamlit UI for Paper Extraction
472
+ # --------------------------------------------------------------------------------
473
+ def main():
474
+ # st.set_page_config(page_title="Extract Research Paper", layout="wide")
475
+ st.title("Extract Research Paper")
476
+
477
+ paper_type = st.selectbox(
478
+ "Select type of research paper:",
479
+ [
480
+ "Review Based Paper",
481
+ "Opinion/Perspective Based Paper",
482
+ "Empirical Research Paper",
483
+ "Research Paper (Other)",
484
+ ],
485
+ )
486
+
487
+ uploaded_file = st.file_uploader("Upload a PDF or text file", type=["pdf", "txt"])
488
+
489
+ if st.button("Extract & Save") and uploaded_file:
490
+ try:
491
+ # Read file content
492
+ if uploaded_file.type == "application/pdf":
493
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
494
+ text_content = ""
495
+ for page in pdf_reader.pages:
496
+ text_content += page.extract_text()
497
+ else:
498
+ text_content = uploaded_file.read().decode("utf-8", errors="replace")
499
+
500
+ with st.spinner("Extracting fields..."):
501
+ data = process_paper(text_content, paper_type)
502
+
503
+ if data:
504
+ st.success(
505
+ f"Paper extracted and saved to MongoDB in '{paper_type}' collection!"
506
+ )
507
+ st.write("Extracted fields:")
508
+ st.json(data)
509
+
510
+ except Exception as e:
511
+ st.error(f"An error occurred: {str(e)}")
512
+
513
+
514
+ # ...existing code (if any)...
515
+
516
+ if __name__ == "__main__":
517
+ main()
research3.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import requests
4
+ import json
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
11
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
12
+
13
+
14
+ def call_perplexity_api(prompt: str) -> str:
15
+ """Call Perplexity AI with a prompt, return the text response if successful."""
16
+ headers = {
17
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
18
+ "Content-Type": "application/json",
19
+ }
20
+ payload = {
21
+ "model": "llama-3.1-sonar-small-128k-chat",
22
+ "messages": [{"role": "user", "content": prompt}],
23
+ "temperature": 0.3,
24
+ }
25
+
26
+ try:
27
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
28
+ response.raise_for_status()
29
+ return response.json()["choices"][0]["message"]["content"]
30
+ except Exception as e:
31
+ st.error(f"API Error: {str(e)}")
32
+ return ""
33
+
34
+
35
+ def generate_research_paper(df: pd.DataFrame, topic: str) -> dict:
36
+ """
37
+ For each column in the DataFrame, generate a research paper section (200-500 words)
38
+ that addresses the data in that column on the given topic. Return a dict: column -> text.
39
+ """
40
+ paper_sections = {}
41
+ for col in df.columns:
42
+ # Convert all non-null rows in the column to strings and join them for context
43
+ col_values = df[col].dropna().astype(str).tolist()
44
+ # We'll truncate if there's a ton of text
45
+ sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
46
+
47
+ prompt = f"""
48
+ Topic: {topic}
49
+ Column: {col}
50
+ Data Samples: {sample_text}
51
+
52
+ Generate a well-structured research paper section that addresses the topic above,
53
+ referencing relevant information from the column data.
54
+ The section should be at least 100 words and at most 150 words.
55
+ Provide insights, examples, and possible research directions integrating the corpus data.
56
+ """
57
+ section_text = call_perplexity_api(prompt)
58
+ paper_sections[col] = section_text.strip() if section_text else ""
59
+ return paper_sections
60
+
61
+
62
+ def format_paper(paper_dict: dict, topic: str) -> str:
63
+ """
64
+ Format the generated paper into a Markdown string.
65
+ Add the topic as the main title, each column name as a heading, and
66
+ the corresponding text as paragraph content.
67
+ """
68
+ md_text = f"# Research Paper on: {topic}\n\n"
69
+ for col, content in paper_dict.items():
70
+ md_text += f"## {col}\n{content}\n\n"
71
+ return md_text
72
+
73
+
74
+ def main():
75
+ st.title("Topic + Corpus-Based Research Paper Generator")
76
+
77
+ topic_input = st.text_input("Enter the topic for the research paper:")
78
+ uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
79
+
80
+ if uploaded_file:
81
+ df = pd.read_csv(uploaded_file)
82
+ st.write("### Preview of Uploaded Data")
83
+ st.dataframe(df.head())
84
+
85
+ if st.button("Generate Research Paper"):
86
+ if topic_input.strip():
87
+ st.info("Generating paper based on the topic and the corpus columns...")
88
+ with st.spinner("Calling Perplexity AI..."):
89
+ paper = generate_research_paper(df, topic_input)
90
+ if paper:
91
+ formatted_paper = format_paper(paper, topic_input)
92
+ st.success("Research Paper Generated Successfully!")
93
+ st.write(formatted_paper)
94
+
95
+ st.download_button(
96
+ label="Download Paper as Markdown",
97
+ data=formatted_paper,
98
+ file_name="research_paper.md",
99
+ mime="text/markdown",
100
+ )
101
+ else:
102
+ st.error(
103
+ "Paper generation failed. Please check Perplexity API key."
104
+ )
105
+ else:
106
+ st.warning("Please enter a valid topic.")
107
+
108
+
109
+ if __name__ == "__main__":
110
+ main()
research_assistant_dashboard.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import streamlit as st
2
+ # from openai import OpenAI
3
+ # import os
4
+ # from dotenv import load_dotenv
5
+ # from llama_index.core import (
6
+ # VectorStoreIndex,
7
+ # SimpleDirectoryReader,
8
+ # Document,
9
+ # GPTVectorStoreIndex,
10
+ # )
11
+ # from bson import ObjectId
12
+ # import requests
13
+ # import openai
14
+ # import numpy as np
15
+ # from pymongo import MongoClient
16
+ # from bson import ObjectId
17
+ # from datetime import datetime
18
+ # from llama_index.embeddings.openai import OpenAIEmbedding
19
+ # from typing import List, Dict
20
+
21
+ # # Initialize Perplexity API and OpenAI API
22
+ # load_dotenv()
23
+ # perplexity_api_key = os.getenv("PERPLEXITY_KEY")
24
+ # openai.api_key = os.getenv("OPENAI_KEY")
25
+
26
+ # # MongoDB setup
27
+ # MONGO_URI = os.getenv("MONGO_URI")
28
+ # client = MongoClient(MONGO_URI)
29
+ # db = client["novascholar_db"]
30
+ # research_papers_collection = db["research_papers"]
31
+
32
+
33
+ # def fetch_perplexity_data(api_key, topic):
34
+ # """
35
+ # Fetch research papers data from Perplexity API with proper formatting
36
+ # """
37
+ # headers = {
38
+ # "accept": "application/json",
39
+ # "content-type": "application/json",
40
+ # "authorization": f"Bearer {api_key}",
41
+ # }
42
+
43
+ # # Structured prompt to get properly formatted response
44
+ # messages = [
45
+ # {
46
+ # "role": "system",
47
+ # "content": """You are a research paper retrieval expert. For the given topic, return exactly 10 research papers in the following format:
48
+ # Title: Paper Title
49
+ # Authors: Author 1, Author 2
50
+ # Year: YYYY
51
+ # Content: Detailed paper content with abstract and key findings
52
+ # URL: DOI or paper URL
53
+ # """,
54
+ # },
55
+ # {"role": "user", "content": f"Find 10 research papers about: {topic}"},
56
+ # ]
57
+
58
+ # try:
59
+ # client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
60
+ # response = client.chat.completions.create(
61
+ # model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
62
+ # messages=messages,
63
+ # )
64
+
65
+ # # Extract and validate response
66
+ # content = response.choices[0].message.content
67
+ # st.write("Fetched Data:", content) # Debugging line to check the fetched data
68
+
69
+ # return content
70
+
71
+ # except Exception as e:
72
+ # st.error(f"Failed to fetch data from Perplexity API: {str(e)}")
73
+ # return ""
74
+
75
+
76
+ # def split_and_vectorize_papers(content: str) -> List[Dict]:
77
+ # """Split and vectorize papers using OpenAI embeddings"""
78
+ # papers = content.split("\n\n")
79
+
80
+ # # Initialize OpenAI client
81
+ # # client = OpenAI() # Uses api_key from environment variable
82
+ # vectors = []
83
+
84
+ # for paper in papers:
85
+ # try:
86
+ # # Get embedding using OpenAI's API directly
87
+ # response = openai.embeddings.create(
88
+ # model="text-embedding-ada-002", input=paper, encoding_format="float"
89
+ # )
90
+
91
+ # # Extract embedding from response
92
+ # embedding = response.data[0].embedding
93
+
94
+ # vectors.append(
95
+ # {"content": paper, "vector": embedding, "timestamp": datetime.utcnow()}
96
+ # )
97
+
98
+ # except Exception as e:
99
+ # st.error(f"Error vectorizing paper: {str(e)}")
100
+ # continue
101
+
102
+ # return vectors
103
+
104
+
105
+ # def store_papers_in_mongodb(papers):
106
+ # """Store papers with vectors in MongoDB"""
107
+ # try:
108
+ # for paper in papers:
109
+ # # Prepare MongoDB document
110
+ # mongo_doc = {
111
+ # "content": paper["content"],
112
+ # "vector": paper["vector"],
113
+ # "created_at": datetime.utcnow(),
114
+ # }
115
+
116
+ # # Insert into MongoDB
117
+ # db.papers.update_one(
118
+ # {"content": paper["content"]}, {"$set": mongo_doc}, upsert=True
119
+ # )
120
+
121
+ # st.success(f"Stored {len(papers)} papers in database")
122
+ # return True
123
+ # except Exception as e:
124
+ # st.error(f"Error storing papers: {str(e)}")
125
+
126
+
127
+ # def get_research_papers(query):
128
+ # """
129
+ # Get and store research papers with improved error handling
130
+ # """
131
+ # # Fetch papers from Perplexity
132
+ # content = fetch_perplexity_data(perplexity_api_key, query)
133
+
134
+ # if not content:
135
+ # return []
136
+
137
+ # # Split and vectorize papers
138
+ # papers = split_and_vectorize_papers(content)
139
+
140
+ # # Store papers in MongoDB
141
+ # if store_papers_in_mongodb(papers):
142
+ # return papers
143
+ # else:
144
+ # st.warning("Failed to store papers in database, but returning fetched results")
145
+ # return papers
146
+
147
+
148
+ # def analyze_research_gaps(papers):
149
+ # """
150
+ # Analyze research gaps with improved prompt and error handling
151
+ # """
152
+ # if not papers:
153
+ # return "No papers provided for analysis"
154
+
155
+ # # Prepare paper summaries for analysis
156
+ # paper_summaries = "\n\n".join(
157
+ # [
158
+ # f"Key Findings: {paper['content'][:500]}..."
159
+ # # f"Title: {paper['title']}\nYear: {paper['year']}\nKey Findings: {paper['content'][:500]}..."
160
+ # for paper in papers
161
+ # ]
162
+ # )
163
+
164
+ # headers = {
165
+ # "Authorization": f"Bearer {perplexity_api_key}",
166
+ # "Content-Type": "application/json",
167
+ # }
168
+
169
+ # data = {
170
+ # "messages": [
171
+ # {
172
+ # "role": "system",
173
+ # "content": "You are a research analysis expert. Identify specific research gaps and future research directions based on the provided papers. Format your response with clear sections: Current State, Identified Gaps, and Future Directions.",
174
+ # },
175
+ # {
176
+ # "role": "user",
177
+ # "content": f"Analyze these papers and identify research gaps:\n\n{paper_summaries}",
178
+ # },
179
+ # ]
180
+ # }
181
+
182
+ # try:
183
+ # client = OpenAI(
184
+ # api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
185
+ # )
186
+ # response = client.chat.completions.create(
187
+ # model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
188
+ # messages=data["messages"],
189
+ # )
190
+ # return response.choices[0].message.content
191
+
192
+ # except Exception as e:
193
+ # st.error(f"Failed to analyze research gaps: {str(e)}")
194
+ # return "Error analyzing research gaps"
195
+
196
+
197
+ # def create_research_paper(gaps, topic, papers):
198
+ # """
199
+ # Create a research paper that addresses the identified gaps using Perplexity API
200
+ # """
201
+ # full_texts = "\n\n".join([paper["content"] for paper in papers])
202
+ # headers = {
203
+ # "Authorization": f"Bearer {perplexity_api_key}",
204
+ # "Content-Type": "application/json",
205
+ # }
206
+ # data = {
207
+ # "messages": [
208
+ # {
209
+ # "role": "system",
210
+ # "content": "You are a research paper generation expert. Create a comprehensive research paper that addresses the identified gaps based on the provided papers. Format your response with clear sections: Introduction, Literature Review, Methodology, Results, Discussion, Conclusion, and References.",
211
+ # },
212
+ # {
213
+ # "role": "user",
214
+ # "content": f"Create a research paper on the topic '{topic}' that addresses the following research gaps:\n\n{gaps}\n\nBased on the following papers:\n\n{full_texts}",
215
+ # },
216
+ # ]
217
+ # }
218
+ # try:
219
+ # client = OpenAI(
220
+ # api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
221
+ # )
222
+ # response = client.chat.completions.create(
223
+ # model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
224
+ # messages=data["messages"],
225
+ # )
226
+ # return response.choices[0].message.content
227
+
228
+ # except Exception as e:
229
+ # st.error(f"Failed to create research paper: {str(e)}")
230
+ # return "Error creating research paper"
231
+
232
+
233
+ # def cosine_similarity(vec1, vec2):
234
+ # """Calculate the cosine similarity between two vectors"""
235
+ # vec1 = np.array(vec1)
236
+ # vec2 = np.array(vec2)
237
+ # return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
238
+
239
+
240
+ # def calculate_cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
241
+ # """Calculate cosine similarity between two vectors"""
242
+ # return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
243
+
244
+
245
+ # def display_research_assistant_dashboard():
246
+ # """Display research assistant dashboard"""
247
+ # # Initialize session state for recommendations
248
+ # if "recommendations" not in st.session_state:
249
+ # st.session_state.recommendations = None
250
+ # if "vectors" not in st.session_state:
251
+ # st.session_state.vectors = None
252
+ # if "generated_paper" not in st.session_state:
253
+ # st.session_state.generated_paper = None
254
+
255
+ # # Sidebar
256
+ # with st.sidebar:
257
+ # st.title(f"Welcome, {st.session_state.username}")
258
+ # if st.button("Logout", use_container_width=True):
259
+ # for key in st.session_state.keys():
260
+ # del st.session_state[key]
261
+ # st.rerun()
262
+
263
+ # # Main content
264
+ # st.title("Research Paper Recommendations")
265
+ # search_query = st.text_input("Enter research topic:")
266
+ # col1, col2 = st.columns(2)
267
+ # with col1:
268
+ # if st.button("Get Research Papers"):
269
+ # if search_query:
270
+ # with st.spinner("Fetching recommendations..."):
271
+ # st.session_state.recommendations = get_research_papers(search_query)
272
+ # st.session_state.vectors = [
273
+ # paper["vector"] for paper in st.session_state.recommendations
274
+ # ]
275
+ # st.markdown(
276
+ # "\n\n".join(
277
+ # [
278
+ # f"**{i+1}.**\n{paper['content']}"
279
+ # # f"**{i+1}. {paper['title']}**\n{paper['content']}"
280
+ # for i, paper in enumerate(
281
+ # st.session_state.recommendations
282
+ # )
283
+ # ]
284
+ # )
285
+ # )
286
+ # else:
287
+ # st.warning("Please enter a search query")
288
+ # with col2:
289
+ # if st.button("Analyze Research Gaps"):
290
+ # if st.session_state.recommendations:
291
+ # with st.spinner("Analyzing research gaps..."):
292
+ # gaps = analyze_research_gaps(st.session_state.recommendations)
293
+ # st.session_state.generated_paper = create_research_paper(
294
+ # gaps, search_query, st.session_state.recommendations
295
+ # )
296
+ # st.markdown("### Potential Research Gaps")
297
+ # st.markdown(gaps)
298
+ # else:
299
+ # st.warning("Please get research papers first")
300
+
301
+ # if st.button("Save and Vectorize"):
302
+ # if st.session_state.generated_paper:
303
+ # try:
304
+ # # Initialize OpenAI client
305
+
306
+ # # Get embedding for generated paper
307
+ # response = openai.embeddings.create(
308
+ # model="text-embedding-ada-002",
309
+ # input=st.session_state.generated_paper,
310
+ # encoding_format="float",
311
+ # )
312
+ # generated_vector = response.data[0].embedding
313
+
314
+ # # Calculate similarities with stored vectors
315
+ # similarities = [
316
+ # calculate_cosine_similarity(generated_vector, paper_vector)
317
+ # for paper_vector in st.session_state.vectors
318
+ # ]
319
+
320
+ # # Display results
321
+ # st.markdown("### Generated Research Paper")
322
+ # st.markdown(st.session_state.generated_paper)
323
+
324
+ # st.markdown("### Cosine Similarities with Original Papers")
325
+ # for i, similarity in enumerate(similarities):
326
+ # st.metric(
327
+ # f"Paper {i+1}",
328
+ # value=f"{similarity:.3f}",
329
+ # help="Cosine similarity (1.0 = identical, 0.0 = completely different)",
330
+ # )
331
+
332
+ # except Exception as e:
333
+ # st.error(f"Error during vectorization: {str(e)}")
334
+ # else:
335
+ # st.warning("Please analyze research gaps first")
336
+
337
+
338
+ # # Run the dashboard
339
+ # if __name__ == "__main__":
340
+ # display_research_assistant_dashboard()
341
+
342
+ import research_combine2
343
+ # if __name__ == "__main__":
344
+ # display_research_assistant_dashboard()
345
+ def display_research_assistant_dashboard():
346
+ research_combine2.display_research_assistant_dashboard()
347
+
348
+
349
+
research_combine.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import json
4
+ import requests
5
+ from dotenv import load_dotenv
6
+ from pymongo import MongoClient
7
+ from typing import Dict, Any
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
12
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
13
+ MONGODB_URI = os.getenv(
14
+ "MONGODB_UR",
15
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
16
+ )
17
+
18
+ # MongoDB setup
19
+ client = MongoClient(MONGODB_URI)
20
+ db = client["novascholar_db"]
21
+ collection = db["research_papers"]
22
+
23
+
24
+ def search_papers(topic: str, num_papers: int) -> str:
25
+ headers = {
26
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
27
+ "Content-Type": "application/json",
28
+ }
29
+
30
+ prompt = f"""Find {num_papers} recent research papers about {topic}.
31
+ Return ONLY a valid JSON array with the following structure for each paper, no additional text:
32
+ [
33
+ {{
34
+ "Title": "paper title",
35
+ "Publication": "publication name",
36
+ "Journal_Conference": "venue name",
37
+ "Abstract": "abstract text",
38
+ "Keywords": "key terms",
39
+ "Author": "author names",
40
+ "Date_of_Publication": "publication date",
41
+ "Intro": "introduction summary",
42
+ "Literature_Review": "literature review summary",
43
+ "Research_Models_Used": "models description",
44
+ "Methodology": "methodology description",
45
+ "Discussion": "discussion summary",
46
+ "Future_Scope": "future work",
47
+ "Theory": "theoretical framework",
48
+ "Independent_Variables": "list of variables",
49
+ "nof_Independent_Variables": 0,
50
+ "Dependent_Variables": "list of variables",
51
+ "nof_Dependent_Variables": 0,
52
+ "Control_Variables": "list of variables",
53
+ "nof_Control_Variables": 0,
54
+ "Extraneous_Variables": "list of variables",
55
+ "nof_Extraneous_Variables": 0
56
+ }}
57
+ ]"""
58
+
59
+ payload = {
60
+ "model": "llama-3.1-sonar-small-128k-chat",
61
+ "messages": [
62
+ {
63
+ "role": "system",
64
+ "content": "You are a research paper analyzer that returns only valid JSON arrays.",
65
+ },
66
+ {"role": "user", "content": prompt},
67
+ ],
68
+ "temperature": 0.1,
69
+ }
70
+
71
+ try:
72
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
73
+ response.raise_for_status()
74
+ content = response.json()["choices"][0]["message"]["content"]
75
+
76
+ # Clean response and ensure it's valid JSON
77
+ content = content.strip()
78
+ if not content.startswith("["):
79
+ content = content[content.find("[") :]
80
+ if not content.endswith("]"):
81
+ content = content[: content.rfind("]") + 1]
82
+
83
+ # Validate JSON
84
+ papers = json.loads(content)
85
+ if not isinstance(papers, list):
86
+ raise ValueError("Response is not a JSON array")
87
+
88
+ # Insert into MongoDB
89
+ if papers:
90
+ collection.insert_many(papers)
91
+ return content
92
+ return "[]"
93
+
94
+ except json.JSONDecodeError as e:
95
+ st.error(f"Invalid JSON response: {str(e)}")
96
+ return None
97
+ except Exception as e:
98
+ st.error(f"Error: {str(e)}")
99
+ return None
100
+
101
+
102
+ import research22
103
+ import keywords_database_download
104
+ import new_keywords
105
+ import infranew
106
+ import loldude
107
+ import new_research_paper
108
+ import research3
109
+ import entire_download
110
+
111
+
112
+ def main():
113
+ st.set_page_config(page_title="Research Papers", layout="wide")
114
+
115
+ st.title("Research Papers")
116
+
117
+ # Sidebar radio
118
+ option = st.sidebar.radio(
119
+ "Select an option",
120
+ [
121
+ "Search Papers",
122
+ "Upload Paper",
123
+ "Single Keyword Search",
124
+ "Multiple Keywords Search",
125
+ "Knowledge Graph",
126
+ "Cosine Similarity",
127
+ "Paper Generator",
128
+ "Paper from Topic",
129
+ "Download Entire Corpus",
130
+ ],
131
+ )
132
+
133
+ if option == "Search Papers":
134
+ st.subheader("Search and Store Papers")
135
+
136
+ topic = st.text_input("Enter research topic")
137
+ num_papers = st.number_input(
138
+ "Number of papers", min_value=1, max_value=10, value=5
139
+ )
140
+
141
+ if st.button("Search and Store"):
142
+ if topic:
143
+ with st.spinner(f"Searching and storing papers about {topic}..."):
144
+ results = search_papers(topic, num_papers)
145
+ if results:
146
+ st.success(
147
+ f"Successfully stored {num_papers} papers in MongoDB"
148
+ )
149
+ # Display results
150
+ papers = json.loads(results)
151
+ for paper in papers:
152
+ with st.expander(paper["Title"]):
153
+ for key, value in paper.items():
154
+ if key != "Title":
155
+ st.write(f"**{key}:** {value}")
156
+ else:
157
+ st.warning("Please enter a research topic")
158
+
159
+ # Add MongoDB connection status
160
+ if st.sidebar.button("Check Database Connection"):
161
+ try:
162
+ client.admin.command("ping")
163
+ print(MONGODB_URI)
164
+ st.sidebar.success("Connected to MongoDB")
165
+ except Exception as e:
166
+ st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
167
+ elif option == "Single Keyword Search":
168
+ keywords_database_download.main()
169
+ elif option == "Multiple Keywords Search":
170
+ new_keywords.main()
171
+ elif option == "Knowledge Graph":
172
+ infranew.main()
173
+ elif option == "Cosine Similarity":
174
+ loldude.main()
175
+ elif option == "Paper Generator":
176
+ new_research_paper.main()
177
+ elif option == "Paper from Topic":
178
+ research3.main()
179
+ elif option == "Download Entire Corpus":
180
+ entire_download.main()
181
+ else:
182
+ # st.subheader("Blank Page")
183
+ # st.write("This is a placeholder for alternative content.")
184
+ research22.main()
185
+
186
+
187
+ if __name__ == "__main__":
188
+ main()
research_combine2.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import new_research_paper
2
+ import research3
3
+ import entire_download
4
+ import streamlit as st
5
+ import os
6
+ import json
7
+ import requests
8
+ from dotenv import load_dotenv
9
+ from pymongo import MongoClient
10
+ from typing import Dict, Any
11
+ import research22
12
+ import keywords_database_download
13
+ import new_keywords
14
+ import infranew
15
+ import loldude
16
+ import new_research_paper
17
+ import research3
18
+ import entire_download
19
+ import sciclone
20
+ import extract
21
+
22
+ # Load environment variables
23
+ load_dotenv()
24
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
25
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
26
+ MONGODB_URI = os.getenv(
27
+ "MONGODB_UR",
28
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
29
+ )
30
+
31
+ # MongoDB setup
32
+ client = MongoClient(MONGODB_URI)
33
+ db = client["novascholar_db"]
34
+
35
+
36
+ def search_papers(topic: str, num_papers: int, paper_type: str) -> str:
37
+ headers = {
38
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
39
+ "Content-Type": "application/json",
40
+ }
41
+
42
+ attributes = {
43
+ "Review Based Paper": [
44
+ "Title",
45
+ "Publication",
46
+ "Journal_Conference",
47
+ "Abstract",
48
+ "Keywords",
49
+ "Author",
50
+ "Date_of_Publication",
51
+ "Intro",
52
+ "Literature_Review",
53
+ "Body",
54
+ "Protocol",
55
+ "Search String",
56
+ "Included Studies",
57
+ "Data Collection and Analysis Methods",
58
+ "Data Extraction Table",
59
+ "Synthesis and Analysis",
60
+ "Conclusion",
61
+ "Limitations",
62
+ "Results",
63
+ "References",
64
+ "Risk of Bias Assessment",
65
+ ],
66
+ "Opinion/Perspective Based Paper": [
67
+ "Title",
68
+ "Publication",
69
+ "Journal_Conference",
70
+ "Abstract",
71
+ "Keywords",
72
+ "Author",
73
+ "Date_of_Publication",
74
+ "Intro",
75
+ "Literature_Review",
76
+ "Introduction",
77
+ "Body",
78
+ "Results and Discussion",
79
+ "Conclusion",
80
+ "References",
81
+ ],
82
+ "Empirical Research Paper": [
83
+ "Title",
84
+ "Publication",
85
+ "Journal_Conference",
86
+ "Abstract",
87
+ "Keywords",
88
+ "Author",
89
+ "Date_of_Publication",
90
+ "Intro",
91
+ "Literature_Review",
92
+ "Introduction",
93
+ "Body",
94
+ "Methodology",
95
+ "Participants",
96
+ "Survey Instrument",
97
+ "Data Collection",
98
+ "Data Analysis",
99
+ "Results and Discussion",
100
+ "Conclusion",
101
+ "References",
102
+ ],
103
+ "Research Paper (Other)": [
104
+ "Title",
105
+ "Publication",
106
+ "Journal_Conference",
107
+ "Abstract",
108
+ "Keywords",
109
+ "Author",
110
+ "Date_of_Publication",
111
+ "Intro",
112
+ "Literature_Review",
113
+ "Research_Models_Used",
114
+ "Methodology",
115
+ "Discussion",
116
+ "Future_Scope",
117
+ "Theory",
118
+ "Independent_Variables",
119
+ "nof_Independent_Variables",
120
+ "Dependent_Variables",
121
+ "nof_Dependent_Variables",
122
+ "Control_Variables",
123
+ "Extraneous_Variables",
124
+ "nof_Control_Variables",
125
+ "nof_Extraneous_Variables",
126
+ ],
127
+ }
128
+
129
+ selected_attributes = attributes[paper_type]
130
+ prompt = f"""Find {num_papers} recent research papers about {topic}.
131
+ Return ONLY a valid JSON array with the following structure for each paper, no additional text:
132
+ [{{
133
+ {", ".join([f'"{attr}": "value"' for attr in selected_attributes])}
134
+ }}]"""
135
+
136
+ payload = {
137
+ "model": "llama-3.1-sonar-small-128k-chat",
138
+ "messages": [
139
+ {
140
+ "role": "system",
141
+ "content": "You are a research paper analyzer that returns only valid JSON arrays.",
142
+ },
143
+ {"role": "user", "content": prompt},
144
+ ],
145
+ "temperature": 0.1,
146
+ }
147
+
148
+ try:
149
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
150
+ response.raise_for_status()
151
+ content = response.json()["choices"][0]["message"]["content"]
152
+
153
+ # Clean response and ensure it's valid JSON
154
+ content = content.strip()
155
+ if not content.startswith("["):
156
+ content = content[content.find("[") :]
157
+ if not content.endswith("]"):
158
+ content = content[: content.rfind("]") + 1]
159
+
160
+ # Validate JSON
161
+ papers = json.loads(content)
162
+ if not isinstance(papers, list):
163
+ raise ValueError("Response is not a JSON array")
164
+
165
+ # Insert into MongoDB
166
+ collection = db[paper_type.replace(" ", "_").lower()]
167
+ if papers:
168
+ collection.insert_many(papers)
169
+ return content
170
+ return "[]"
171
+
172
+ except json.JSONDecodeError as e:
173
+ st.error(f"Invalid JSON response: {str(e)}")
174
+ return None
175
+ except Exception as e:
176
+ st.error(f"Error: {str(e)}")
177
+ return None
178
+
179
+
180
+ def display_research_assistant_dashboard():
181
+ #st.set_page_config(page_title="Research Papers", layout="wide")
182
+
183
+ # st.title("Research Papers")
184
+
185
+ # Sidebar radio
186
+ option = st.sidebar.radio(
187
+ "Select an option",
188
+ [
189
+ "Search Papers",
190
+ "Upload Paper",
191
+ "Single Keyword Search",
192
+ "Multiple Keywords Search",
193
+ "Knowledge Graph",
194
+ "Cosine Similarity",
195
+ "Paper Generator",
196
+ "Paper from Topic",
197
+ "Download Entire Corpus",
198
+ "Research Copilot",
199
+ "Research Paper Analysis Tool",
200
+ ],
201
+ )
202
+
203
+ if option == "Search Papers":
204
+ st.subheader("Search and Store Papers")
205
+
206
+ topic = st.text_input("Enter research topic")
207
+ num_papers = st.number_input(
208
+ "Number of papers", min_value=1, max_value=10, value=5
209
+ )
210
+ paper_type = st.selectbox(
211
+ "Select type of research paper",
212
+ [
213
+ "Review Based Paper",
214
+ "Opinion/Perspective Based Paper",
215
+ "Empirical Research Paper",
216
+ "Research Paper (Other)",
217
+ ],
218
+ )
219
+
220
+ if st.button("Search and Store"):
221
+ if topic:
222
+ with st.spinner(f"Searching and storing papers about {topic}..."):
223
+ results = search_papers(topic, num_papers, paper_type)
224
+ if results:
225
+ st.success(
226
+ f"Successfully stored {num_papers} papers in MongoDB"
227
+ )
228
+ # Display results
229
+ papers = json.loads(results)
230
+ for paper in papers:
231
+ with st.expander(paper["Title"]):
232
+ for key, value in paper.items():
233
+ if key != "Title":
234
+ st.write(f"**{key}:** {value}")
235
+ else:
236
+ st.warning("Please enter a research topic")
237
+
238
+ # Add MongoDB connection status
239
+ if st.sidebar.button("Check Database Connection"):
240
+ try:
241
+ client.admin.command("ping")
242
+ print(MONGODB_URI)
243
+ st.sidebar.success("Connected to MongoDB")
244
+ except Exception as e:
245
+ st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
246
+ elif option == "Single Keyword Search":
247
+ keywords_database_download.main()
248
+ elif option == "Multiple Keywords Search":
249
+ new_keywords.main()
250
+ elif option == "Knowledge Graph":
251
+ infranew.main()
252
+ elif option == "Cosine Similarity":
253
+ loldude.main()
254
+ elif option == "Paper Generator":
255
+ new_research_paper.main()
256
+ elif option == "Paper from Topic":
257
+ research3.main()
258
+ elif option == "Download Entire Corpus":
259
+ entire_download.main()
260
+ elif option == "Research Copilot":
261
+ sciclone.main()
262
+ elif option == "Research Paper Analysis Tool":
263
+ extract.main()
264
+ else:
265
+ research22.main()
266
+
267
+
268
+ if __name__ == "__main__":
269
+ display_research_assistant_dashboard()
sciclone.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import PyPDF2
4
+ from typing import Optional, Dict, List
5
+ import json
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ import xml.etree.ElementTree as ET
9
+ import re
10
+ from datetime import datetime
11
+ import time
12
+ from dotenv import load_dotenv
13
+ import os
14
+ import pandas as pd
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
19
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
20
+ SAPLING_API_KEY = os.getenv("SAPLING_API_KEY")
21
+
22
+
23
+ def call_perplexity_api(prompt: str) -> str:
24
+ """Call Perplexity AI with a prompt, return the text response if successful."""
25
+ headers = {
26
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
27
+ "Content-Type": "application/json",
28
+ }
29
+
30
+ payload = {
31
+ "model": "llama-3.1-sonar-small-128k-chat",
32
+ "messages": [{"role": "user", "content": prompt}],
33
+ "temperature": 0.3,
34
+ }
35
+
36
+ try:
37
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
38
+ response.raise_for_status()
39
+ return response.json()["choices"][0]["message"]["content"]
40
+ except Exception as e:
41
+ st.error(f"API Error: {str(e)}")
42
+ return ""
43
+
44
+
45
+ def extract_text_from_pdf(pdf_file):
46
+ """Extract text content from a PDF file."""
47
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
48
+ text = ""
49
+ for page in pdf_reader.pages:
50
+ text += page.extract_text() + "\n"
51
+ return text
52
+
53
+
54
+ def analyze_paper(text: str, category: str) -> str:
55
+ """Generate a prompt and get analysis for a specific category."""
56
+ prompts = {
57
+ "Summarized Abstract": "Extract and summarize the abstract from this research paper:",
58
+ "Results": "What are the main results and findings from this research paper:",
59
+ "Summarized Introduction": "Summarize the introduction section of this research paper:",
60
+ "Methods Used": "What are the main methods and methodologies used in this research:",
61
+ "Literature Survey": "Summarize the literature review or related work from this paper:",
62
+ "Limitations": "What are the limitations mentioned in this research:",
63
+ "Contributions": "What are the main contributions of this research:",
64
+ "Practical Implications": "What are the practical implications of this research:",
65
+ "Objectives": "What are the main objectives of this research:",
66
+ "Findings": "What are the key findings from this research:",
67
+ "Future Research": "What future research directions are suggested in this paper:",
68
+ "Dependent Variables": "What are the dependent variables studied in this research:",
69
+ "Independent Variables": "What are the independent variables studied in this research:",
70
+ "Dataset": "What dataset(s) were used in this research:",
71
+ "Problem Statement": "What is the main problem statement or research question:",
72
+ "Challenges": "What challenges were faced or addressed in this research:",
73
+ "Applications": "What are the potential applications of this research:",
74
+ }
75
+
76
+ prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
77
+ return call_perplexity_api(prompt)
78
+
79
+
80
+ class ResearchAssistant:
81
+ def __init__(self, perplexity_key: str):
82
+ self.perplexity_key = perplexity_key
83
+
84
+ def chat_with_pdf(self, pdf_text: str, query: str) -> Dict:
85
+ chunks = self._split_text(pdf_text)
86
+ relevant_chunks = self._get_relevant_chunks(chunks, query)
87
+
88
+ prompt = f"Context from PDF:\n\n{relevant_chunks}\n\nQuestion: {query}"
89
+ response_text = call_perplexity_api(prompt)
90
+ return {"choices": [{"message": {"content": response_text}}]}
91
+
92
+ def generate_literature_review(self, topic: str) -> Dict:
93
+ try:
94
+ # Search arXiv for papers
95
+ papers = self._search_arxiv(topic)
96
+ if not papers:
97
+ return {"error": "No papers found on the topic"}
98
+
99
+ # Format paper information
100
+ papers_summary = "\n\n".join(
101
+ [
102
+ f"Paper: {p['title']}\nAuthors: {', '.join(p['authors'])}\nSummary: {p['summary']}"
103
+ for p in papers
104
+ ]
105
+ )
106
+
107
+ prompt = f"""Generate a comprehensive literature review on '{topic}'. Based on these papers:
108
+
109
+ {papers_summary}
110
+
111
+ Structure the review as follows:
112
+ 1. Introduction and Background
113
+ 2. Current Research Trends
114
+ 3. Key Findings and Themes
115
+ 4. Research Gaps
116
+ 5. Future Directions"""
117
+
118
+ response_text = call_perplexity_api(prompt)
119
+ return {"choices": [{"message": {"content": response_text}}]}
120
+ except Exception as e:
121
+ return {"error": f"Literature review generation failed: {str(e)}"}
122
+
123
+ def ai_writer(self, outline: str, references: List[str]) -> Dict:
124
+ prompt = f"""Write a research paper following this structure:
125
+
126
+ Outline:
127
+ {outline}
128
+
129
+ References to incorporate:
130
+ {json.dumps(references)}
131
+
132
+ Instructions:
133
+ - Follow academic writing style
134
+ - Include appropriate citations
135
+ - Maintain logical flow
136
+ - Include introduction and conclusion"""
137
+
138
+ response_text = call_perplexity_api(prompt)
139
+ return {"choices": [{"message": {"content": response_text}}]}
140
+
141
+ def refine_response(self, response: str, column: str) -> str:
142
+ prompt = f"""Refine the following response to fit the '{column}' column in a research paper CSV format:
143
+
144
+ Response: {response}
145
+
146
+ Ensure the response is clear, concise, and fits the context of the column."""
147
+
148
+ refined_response = call_perplexity_api(prompt)
149
+ return refined_response
150
+
151
+ def paraphrase(self, text: str) -> Dict:
152
+ prompt = f"""Paraphrase the following text while:
153
+ - Maintaining academic tone
154
+ - Preserving key meaning
155
+ - Improving clarity
156
+
157
+ Text: {text}"""
158
+
159
+ response_text = call_perplexity_api(prompt)
160
+ return {"choices": [{"message": {"content": response_text}}]}
161
+
162
+ def generate_citation(self, paper_info: Dict, style: str = "APA") -> Dict:
163
+ prompt = f"""Generate a {style} citation for:
164
+ Title: {paper_info['title']}
165
+ Authors: {', '.join(paper_info['authors'])}
166
+ Year: {paper_info['year']}
167
+
168
+ Follow exact {style} format guidelines."""
169
+
170
+ response_text = call_perplexity_api(prompt)
171
+ return {"citation": response_text}
172
+
173
+ def detect_ai_content(self, text: str) -> Dict:
174
+ prompt = f"""You are an AI content detector. Analyze the text for:
175
+ 1. Writing style consistency
176
+ 2. Language patterns
177
+ 3. Contextual coherence
178
+ 4. Common AI patterns
179
+ Provide a clear analysis with confidence level.
180
+
181
+ Text: {text}"""
182
+
183
+ response = requests.post(
184
+ "https://api.sapling.ai/api/v1/aidetect",
185
+ json={"key": SAPLING_API_KEY, "text": text},
186
+ )
187
+ st.info(
188
+ "A score from 0 to 1 will be returned, with 0 indicating the maximum confidence that the text is human-written, and 1 indicating the maximum confidence that the text is AI-generated."
189
+ )
190
+
191
+ if response.status_code == 200:
192
+ return {"choices": [{"message": {"content": response.json()}}]}
193
+ else:
194
+ return {
195
+ "error": f"Sapling API Error: {response.status_code} - {response.text}"
196
+ }
197
+
198
+ def _split_text(self, text: str) -> List[str]:
199
+ splitter = RecursiveCharacterTextSplitter(
200
+ chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " ", ""]
201
+ )
202
+ return splitter.split_text(text)
203
+
204
+ def _get_relevant_chunks(self, chunks: List[str], query: str) -> str:
205
+ # Simple keyword-based relevance scoring
206
+ query_words = set(query.lower().split())
207
+ scored_chunks = []
208
+
209
+ for chunk in chunks:
210
+ chunk_words = set(chunk.lower().split())
211
+ score = len(query_words.intersection(chunk_words))
212
+ scored_chunks.append((score, chunk))
213
+
214
+ scored_chunks.sort(reverse=True)
215
+ return "\n\n".join(chunk for _, chunk in scored_chunks[:3])
216
+
217
+ def _search_arxiv(self, topic: str) -> List[Dict]:
218
+ try:
219
+ query = "+AND+".join(topic.split())
220
+ url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5"
221
+ response = requests.get(url, timeout=10)
222
+ response.raise_for_status()
223
+ return self._parse_arxiv_response(response.text)
224
+ except Exception as e:
225
+ print(f"arXiv search failed: {str(e)}")
226
+ return []
227
+
228
+ def _parse_arxiv_response(self, response_text: str) -> List[Dict]:
229
+ try:
230
+ root = ET.fromstring(response_text)
231
+ papers = []
232
+ for entry in root.findall("{http://www.w3.org/2005/Atom}entry"):
233
+ paper = {
234
+ "id": entry.find("{http://www.w3.org/2005/Atom}id").text,
235
+ "title": entry.find(
236
+ "{http://www.w3.org/2005/Atom}title"
237
+ ).text.strip(),
238
+ "summary": entry.find(
239
+ "{http://www.w3.org/2005/Atom}summary"
240
+ ).text.strip(),
241
+ "authors": [
242
+ author.find("{http://www.w3.org/2005/Atom}name").text.strip()
243
+ for author in entry.findall(
244
+ "{http://www.w3.org/2005/Atom}author"
245
+ )
246
+ ],
247
+ "published": entry.find(
248
+ "{http://www.w3.org/2005/Atom}published"
249
+ ).text[:10],
250
+ }
251
+ papers.append(paper)
252
+ return papers
253
+ except Exception as e:
254
+ print(f"arXiv response parsing failed: {str(e)}")
255
+ return []
256
+
257
+
258
+ def main():
259
+ # st.set_page_config(page_title="Research Assistant", layout="wide")
260
+ st.title("Research Copilot")
261
+
262
+ if not PERPLEXITY_API_KEY:
263
+ st.warning("Perplexity API key not found in environment variables.")
264
+ return
265
+
266
+ assistant = ResearchAssistant(PERPLEXITY_API_KEY)
267
+
268
+ tabs = st.tabs(
269
+ [
270
+ "Chat with PDF",
271
+ "Literature Review",
272
+ "AI Writer",
273
+ "Extract Data",
274
+ "Paraphraser",
275
+ "Citation Generator",
276
+ "AI Detector",
277
+ ]
278
+ )
279
+
280
+ with tabs[0]: # Chat with PDF
281
+ st.header("Chat with PDF")
282
+
283
+ # File uploader with clear button
284
+ col1, col2 = st.columns([3, 1])
285
+ with col1:
286
+ uploaded_file = st.file_uploader("Upload PDF", type="pdf", key="pdf_chat")
287
+ with col2:
288
+ if st.button("Clear PDF"):
289
+ st.session_state.pop("pdf_text", None)
290
+ st.rerun()
291
+
292
+ if uploaded_file:
293
+ if "pdf_text" not in st.session_state:
294
+ with st.spinner("Processing PDF..."):
295
+ reader = PyPDF2.PdfReader(uploaded_file)
296
+ st.session_state.pdf_text = ""
297
+ for page in reader.pages:
298
+ st.session_state.pdf_text += page.extract_text()
299
+ st.success("PDF processed successfully!")
300
+
301
+ query = st.text_input("Ask a question about the PDF")
302
+ if query:
303
+ with st.spinner("Analyzing..."):
304
+ response = assistant.chat_with_pdf(st.session_state.pdf_text, query)
305
+ if "error" in response:
306
+ st.error(response["error"])
307
+ else:
308
+ st.write(response["choices"][0]["message"]["content"])
309
+
310
+ with tabs[1]: # Literature Review
311
+ st.header("Literature Review")
312
+ topic = st.text_input("Enter research topic")
313
+ if st.button("Generate Review") and topic:
314
+ with st.spinner("Generating literature review..."):
315
+ review = assistant.generate_literature_review(topic)
316
+ if "error" in review:
317
+ st.error(review["error"])
318
+ else:
319
+ st.write(review["choices"][0]["message"]["content"])
320
+
321
+ with tabs[2]: # AI Writer
322
+ st.header("AI Writer")
323
+ outline = st.text_area("Enter paper outline")
324
+ references = st.text_area("Enter references (one per line)")
325
+ if st.button("Generate Paper") and outline:
326
+ with st.spinner("Writing paper..."):
327
+ paper = assistant.ai_writer(outline, references.split("\n"))
328
+ if "error" in paper:
329
+ st.error(paper["error"])
330
+ else:
331
+ st.write(paper["choices"][0]["message"]["content"])
332
+
333
+ with tabs[3]: # Extract Data
334
+ st.header("Extract Data")
335
+
336
+ uploaded_files = st.file_uploader(
337
+ "Upload multiple PDF files", type="pdf", accept_multiple_files=True
338
+ )
339
+
340
+ if uploaded_files:
341
+ if st.button("Process Papers"):
342
+ # Initialize progress bar
343
+ progress_bar = st.progress(0)
344
+ status_text = st.empty()
345
+
346
+ # Initialize results dictionary
347
+ results = []
348
+
349
+ # Define categories
350
+ categories = [
351
+ "Summarized Abstract",
352
+ "Results",
353
+ "Summarized Introduction",
354
+ "Methods Used",
355
+ "Literature Survey",
356
+ "Limitations",
357
+ "Contributions",
358
+ "Practical Implications",
359
+ "Objectives",
360
+ "Findings",
361
+ "Future Research",
362
+ "Dependent Variables",
363
+ "Independent Variables",
364
+ "Dataset",
365
+ "Problem Statement",
366
+ "Challenges",
367
+ "Applications",
368
+ ]
369
+
370
+ # Process each file
371
+ for i, file in enumerate(uploaded_files):
372
+ status_text.text(f"Processing {file.name}...")
373
+
374
+ # Extract text from PDF
375
+ text = extract_text_from_pdf(file)
376
+
377
+ # Initialize paper results
378
+ paper_results = {"Filename": file.name}
379
+
380
+ # Analyze each category
381
+ for j, category in enumerate(categories):
382
+ status_text.text(f"Processing {file.name} - {category}")
383
+ paper_results[category] = analyze_paper(text, category)
384
+
385
+ # Update progress
386
+ progress = (i * len(categories) + j + 1) / (
387
+ len(uploaded_files) * len(categories)
388
+ )
389
+ progress_bar.progress(progress)
390
+
391
+ # Add small delay to avoid API rate limits
392
+ time.sleep(1)
393
+
394
+ results.append(paper_results)
395
+
396
+ # Create DataFrame
397
+ df = pd.DataFrame(results)
398
+
399
+ # Convert DataFrame to CSV
400
+ csv = df.to_csv(index=False)
401
+
402
+ # Create download button
403
+ st.download_button(
404
+ label="Download Results as CSV",
405
+ data=csv,
406
+ file_name="research_papers_analysis.csv",
407
+ mime="text/csv",
408
+ )
409
+
410
+ # Display results in the app
411
+ st.subheader("Analysis Results")
412
+ st.dataframe(df)
413
+
414
+ status_text.text("Processing complete!")
415
+ progress_bar.progress(1.0)
416
+
417
+ with tabs[4]: # Paraphraser
418
+ st.header("Paraphraser")
419
+ text = st.text_area("Enter text to paraphrase")
420
+ if st.button("Paraphrase") and text:
421
+ with st.spinner("Paraphrasing..."):
422
+ result = assistant.paraphrase(text)
423
+ if "error" in result:
424
+ st.error(result["error"])
425
+ else:
426
+ st.write(result["choices"][0]["message"]["content"])
427
+
428
+ with tabs[5]: # Citation Generator
429
+ st.header("Citation Generator")
430
+ col1, col2 = st.columns(2)
431
+ with col1:
432
+ title = st.text_input("Paper Title")
433
+ authors = st.text_input("Authors (comma-separated)")
434
+ with col2:
435
+ year = st.text_input("Year")
436
+ style = st.selectbox("Citation Style", ["APA", "MLA", "Chicago"])
437
+
438
+ if st.button("Generate Citation") and title:
439
+ with st.spinner("Generating citation..."):
440
+ citation = assistant.generate_citation(
441
+ {
442
+ "title": title,
443
+ "authors": [a.strip() for a in authors.split(",")],
444
+ "year": year,
445
+ },
446
+ style,
447
+ )
448
+ if "error" in citation:
449
+ st.error(citation["error"])
450
+ else:
451
+ st.code(citation["citation"], language="text")
452
+
453
+ with tabs[6]: # AI Detector
454
+ st.header("AI Detector")
455
+ text = st.text_area("Enter text to analyze")
456
+ if st.button("Detect AI Content") and text:
457
+ with st.spinner("Analyzing..."):
458
+ result = assistant.detect_ai_content(text)
459
+ if "error" in result:
460
+ st.error(result["error"])
461
+ else:
462
+ st.write(result["choices"][0]["message"]["content"])
463
+
464
+
465
+ if __name__ == "__main__":
466
+ main()
session_page.py ADDED
The diff for this file is too large to render. See raw diff
 
ui.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_option_menu import option_menu
3
+
4
+
5
+ # Page Configuration
6
+ st.set_page_config(page_title="Enhanced Navigation Demo", layout="wide")
7
+
8
+ # Top Navigation Bar using option_menu
9
+ selected = option_menu(
10
+ menu_title=None,
11
+ options=["Home", "Documentation", "Examples", "Community", "About"],
12
+ icons=["house", "book", "code", "people", "info-circle"],
13
+ menu_icon="cast",
14
+ default_index=0,
15
+ orientation="horizontal",
16
+ styles={
17
+ "container": {"padding": "0!important", "background-color": "#fafafa"},
18
+ "icon": {"color": "orange", "font-size": "25px"},
19
+ "nav-link": {
20
+ "font-size": "15px",
21
+ "text-align": "center",
22
+ "margin":"0px",
23
+ "--hover-color": "#eee",
24
+ },
25
+ "nav-link-selected": {"background-color": "#0083B8"},
26
+ }
27
+ )
28
+
29
+ # Sidebar Navigation
30
+ with st.sidebar:
31
+ st.header("Navigation Menu")
32
+
33
+ # Main Menu Items
34
+ selected_side = option_menu(
35
+ menu_title="Go to",
36
+ options=["Dashboard", "Analytics", "Reports", "Settings"],
37
+ icons=["speedometer2", "graph-up", "file-text", "gear"],
38
+ menu_icon="list",
39
+ default_index=0,
40
+ )
41
+
42
+ # Expandable Reports Section
43
+ if selected_side == "Reports":
44
+ with st.expander("Reports", expanded=True):
45
+ st.button("Weekly Report")
46
+ st.button("Monthly Report")
47
+ st.button("Annual Report")
48
+
49
+ # Main Content Area based on top navigation
50
+ if selected == "Home":
51
+ st.title("Welcome to Home")
52
+ st.write("This is the home page content.")
53
+
54
+ # Dashboard Content
55
+ st.header("Dashboard")
56
+ col1, col2, col3 = st.columns(3)
57
+ with col1:
58
+ st.metric("Sales", "$12,345", "+2.5%")
59
+ with col2:
60
+ st.metric("Users", "1,234", "-8%")
61
+ with col3:
62
+ st.metric("Conversion", "3.2%", "+1.2%")
63
+
64
+ elif selected == "Documentation":
65
+ st.title("Documentation")
66
+ st.write("Documentation content goes here.")
67
+
68
+ elif selected == "Examples":
69
+ st.title("Examples")
70
+ st.write("Example content goes here.")
71
+
72
+ elif selected == "Community":
73
+ st.title("Community")
74
+ st.write("Community content goes here.")
75
+
76
+ elif selected == "About":
77
+ st.title("About")
78
+ st.write("About content goes here.")
79
+
80
+ # Content based on sidebar selection
81
+ if selected_side == "Analytics":
82
+ st.header("Analytics")
83
+ st.line_chart({"data": [1, 5, 2, 6, 2, 1]})
84
+ elif selected_side == "Settings":
85
+ st.header("Settings")
86
+ st.toggle("Dark Mode")
87
+ st.toggle("Notifications")
88
+ st.slider("Volume", 0, 100, 50)
89
+
90
+ # Footer
91
+ st.markdown(
92
+ """
93
+ <style>
94
+ .footer {
95
+ position: fixed;
96
+ left: 0;
97
+ bottom: 0;
98
+ width: 100%;
99
+ background-color: #0E1117;
100
+ color: white;
101
+ text-align: center;
102
+ padding: 10px;
103
+ font-size: 14px;
104
+ }
105
+ </style>
106
+ <div class='footer'>
107
+ © 2024 Your App Name • Privacy Policy • Terms of Service
108
+ </div>
109
+ """,
110
+ unsafe_allow_html=True
111
+ )
utils/helpers.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ import streamlit as st
3
+
4
+ def format_datetime(dt):
5
+ """Format datetime for display"""
6
+ return dt.strftime("%Y-%m-%d %H:%M")
7
+
8
+ def get_session_progress(username, course_id, session_id):
9
+ """
10
+ Get user's progress for a specific session
11
+ Returns dict with pre_class, in_class, and post_class completion status
12
+ """
13
+ # Demo implementation - replace with actual database queries
14
+ return {
15
+ 'pre_class': {
16
+ 'completed': True,
17
+ 'last_access': datetime.now() - timedelta(days=1),
18
+ 'resources_viewed': 3,
19
+ 'total_resources': 3
20
+ },
21
+ 'in_class': {
22
+ 'completed': False,
23
+ 'attendance': True,
24
+ 'quiz_completed': False,
25
+ 'questions_asked': 5
26
+ },
27
+ 'post_class': {
28
+ 'completed': False,
29
+ 'assignments_submitted': 1,
30
+ 'total_assignments': 2,
31
+ 'grade': None
32
+ }
33
+ }
34
+
35
+ def get_course_sessions(course_id):
36
+ """Get all sessions for a course"""
37
+ # Demo implementation - replace with database query
38
+ return [
39
+ {
40
+ 'id': 1,
41
+ 'title': 'Introduction to Programming Concepts',
42
+ 'date': datetime.now() + timedelta(days=i),
43
+ 'status': 'completed' if i < 0 else 'upcoming'
44
+ }
45
+ for i in range(-2, 5)
46
+ ]
47
+
48
+ def display_progress_bar(completed, total, text=""):
49
+ """Display a progress bar with text"""
50
+ progress = completed / total if total > 0 else 0
51
+ st.progress(progress)
52
+ st.text(f"{text}: {completed}/{total} ({progress*100:.1f}%)")
53
+
54
+ def create_notification(message, type="info"):
55
+ """Create a notification message"""
56
+ if type == "success":
57
+ st.success(message)
58
+ elif type == "error":
59
+ st.error(message)
60
+ elif type == "warning":
61
+ st.warning(message)
62
+ else:
63
+ st.info(message)
64
+
65
+ class SessionManager:
66
+ """Manage session state and navigation"""
67
+ @staticmethod
68
+ def get_current_session():
69
+ """Get current session information"""
70
+ if 'current_session' not in st.session_state:
71
+ st.session_state.current_session = 1
72
+ return st.session_state.current_session
73
+
74
+ @staticmethod
75
+ def set_current_session(session_id):
76
+ """Set current session"""
77
+ st.session_state.current_session = session_id
78
+
79
+ @staticmethod
80
+ def clear_session():
81
+ """Clear session state"""
82
+ for key in list(st.session_state.keys()):
83
+ del st.session_state[key]
utils/sample_data.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+
3
+ SAMPLE_COURSES = [
4
+ {
5
+ 'course_id': 'CS101',
6
+ 'title': 'Introduction to Computer Science',
7
+ 'description': 'This course covers the basics of computer science and programming.',
8
+ 'instructor': 'Dr. John Doe',
9
+ 'duration': '10 weeks'
10
+ },
11
+ {
12
+ 'course_id': 'CS102',
13
+ 'title': 'Data Structures and Algorithms',
14
+ 'description': 'This course introduces data structures and algorithms for efficient data processing.',
15
+ 'instructor': 'Dr. Jane Smith',
16
+ 'duration': '12 weeks'
17
+ },
18
+ {
19
+ 'course_id': 'CS103',
20
+ 'title': 'Advanced Python Programming',
21
+ 'description': 'This course covers advanced topics in Python programming, including file handling and exception management.',
22
+ 'instructor': 'Dr. Emily Johnson',
23
+ 'duration': '8 weeks'
24
+ }
25
+ ]
26
+
27
+ SAMPLE_SESSIONS = [
28
+ {
29
+ 'id': 1,
30
+ 'course_id': 'CS101',
31
+ 'title': 'Introduction to Programming Fundamentals',
32
+ 'date': datetime.now() - timedelta(days=7),
33
+ 'status': 'completed',
34
+ 'pre_class': {
35
+ 'resources': [
36
+ {'type': 'pdf', 'title': 'Introduction to Python Basics', 'url': '/assets/python_basics.pdf'},
37
+ {'type': 'video', 'title': 'Programming Fundamentals', 'duration': '15:00'},
38
+ {'type': 'reading', 'title': 'Chapter 1: Getting Started', 'pages': '1-15'}
39
+ ],
40
+ 'completion_required': True
41
+ },
42
+ 'in_class': {
43
+ 'topics': ['Variables', 'Data Types', 'Basic Operations'],
44
+ 'quiz': {
45
+ 'title': 'Python Basics Quiz',
46
+ 'questions': 5,
47
+ 'duration': 15
48
+ },
49
+ 'polls': [
50
+ {'question': 'How comfortable are you with Python syntax?', 'options': ['Very', 'Somewhat', 'Not at all']}
51
+ ]
52
+ },
53
+ 'post_class': {
54
+ 'assignments': [
55
+ {
56
+ 'id': 1,
57
+ 'title': 'Basic Python Programs',
58
+ 'due_date': datetime.now() + timedelta(days=2),
59
+ 'status': 'pending'
60
+ }
61
+ ]
62
+ }
63
+ },
64
+ {
65
+ 'id': 2,
66
+ 'course_id': 'CS101',
67
+ 'title': 'Control Flow and Functions',
68
+ 'date': datetime.now() - timedelta(days=3),
69
+ 'status': 'completed',
70
+ 'pre_class': {
71
+ 'resources': [
72
+ {'type': 'pdf', 'title': 'Control Flow in Python', 'url': '/assets/control_flow.pdf'},
73
+ {'type': 'video', 'title': 'Functions and Methods', 'duration': '20:00'}
74
+ ],
75
+ 'completion_required': True
76
+ },
77
+ 'in_class': {
78
+ 'topics': ['If-else statements', 'Loops', 'Function definitions'],
79
+ 'quiz': {
80
+ 'title': 'Control Flow Quiz',
81
+ 'questions': 8,
82
+ 'duration': 20
83
+ },
84
+ 'polls': [
85
+ {'question': 'Which loop type do you find more intuitive?', 'options': ['For loops', 'While loops', 'Both']}
86
+ ]
87
+ },
88
+ 'post_class': {
89
+ 'assignments': [
90
+ {
91
+ 'id': 2,
92
+ 'title': 'Function Implementation Exercise',
93
+ 'due_date': datetime.now() + timedelta(days=4),
94
+ 'status': 'pending'
95
+ }
96
+ ]
97
+ }
98
+ },
99
+ {
100
+ 'id': 3,
101
+ 'course_id': 'CS102',
102
+ 'title': 'Data Structures',
103
+ 'date': datetime.now(),
104
+ 'status': 'in_progress',
105
+ 'pre_class': {
106
+ 'resources': [
107
+ {'type': 'pdf', 'title': 'Python Data Structures', 'url': '/assets/data_structures.pdf'},
108
+ {'type': 'video', 'title': 'Lists and Dictionaries', 'duration': '25:00'}
109
+ ],
110
+ 'completion_required': True
111
+ },
112
+ 'in_class': {
113
+ 'topics': ['Lists', 'Tuples', 'Dictionaries', 'Sets'],
114
+ 'quiz': {
115
+ 'title': 'Data Structures Quiz',
116
+ 'questions': 10,
117
+ 'duration': 25
118
+ },
119
+ 'polls': [
120
+ {'question': 'Which data structure do you use most often?', 'options': ['Lists', 'Dictionaries', 'Sets', 'Tuples']}
121
+ ]
122
+ },
123
+ 'post_class': {
124
+ 'assignments': [
125
+ {
126
+ 'id': 3,
127
+ 'title': 'Data Structure Implementation',
128
+ 'due_date': datetime.now() + timedelta(days=7),
129
+ 'status': 'not_started'
130
+ }
131
+ ]
132
+ }
133
+ },
134
+ {
135
+ 'id': 4,
136
+ 'course_id': 'CS101',
137
+ 'title': 'Object-Oriented Programming',
138
+ 'date': datetime.now() + timedelta(days=4),
139
+ 'status': 'upcoming',
140
+ 'pre_class': {
141
+ 'resources': [
142
+ {'type': 'pdf', 'title': 'OOP Concepts', 'url': '/assets/oop_concepts.pdf'},
143
+ {'type': 'video', 'title': 'Classes and Objects', 'duration': '30:00'}
144
+ ],
145
+ 'completion_required': True
146
+ },
147
+ 'in_class': {
148
+ 'topics': ['Classes', 'Objects', 'Inheritance', 'Polymorphism'],
149
+ 'quiz': {
150
+ 'title': 'OOP Concepts Quiz',
151
+ 'questions': 12,
152
+ 'duration': 30
153
+ },
154
+ 'polls': [
155
+ {'question': 'Have you used OOP before?', 'options': ['Yes', 'No', 'Not sure'], 'responses': {'For loops': 12, 'While loops': 8, 'Both': 10}}
156
+ ]
157
+ },
158
+ 'post_class': {
159
+ 'assignments': [
160
+ {
161
+ 'id': 4,
162
+ 'title': 'Class Implementation Project',
163
+ 'due_date': datetime.now() + timedelta(days=11),
164
+ 'status': 'not_started'
165
+ }
166
+ ]
167
+ }
168
+ },
169
+ {
170
+ 'id': 5,
171
+ 'course_id': 'CS103',
172
+ 'title': 'File Handling and Exception Management',
173
+ 'date': datetime.now() + timedelta(days=7),
174
+ 'status': 'upcoming',
175
+ 'pre_class': {
176
+ 'resources': [
177
+ {'type': 'pdf', 'title': 'File Operations in Python', 'url': '/assets/file_ops.pdf'},
178
+ {'type': 'video', 'title': 'Exception Handling', 'duration': '20:00'}
179
+ ],
180
+ 'completion_required': True
181
+ },
182
+ 'in_class': {
183
+ 'topics': ['File Operations', 'Exception Handling', 'Context Managers'],
184
+ 'quiz': {
185
+ 'title': 'File Operations Quiz',
186
+ 'questions': 8,
187
+ 'duration': 20
188
+ },
189
+ 'polls': [
190
+ {'question': 'How often do you handle exceptions in your code?',
191
+ 'options': ['Always', 'Sometimes', 'Rarely', 'Never'],
192
+ 'responses': {'Very': 10, 'Somewhat': 15, 'Not at all': 5}
193
+ }
194
+ ]
195
+ },
196
+ 'post_class': {
197
+ 'assignments': [
198
+ {
199
+ 'id': 5,
200
+ 'title': 'File Processing Application',
201
+ 'due_date': datetime.now() + timedelta(days=14),
202
+ 'status': 'not_started'
203
+ }
204
+ ]
205
+ }
206
+ }
207
+ ]
208
+
209
+ # Chatbot message history
210
+ SAMPLE_CHAT_HISTORY = {
211
+ 1: [
212
+ {'user': 'student1', 'message': 'What is the difference between list and tuple?', 'timestamp': datetime.now()},
213
+ {'user': 'chatbot', 'message': 'Lists are mutable (can be modified) while tuples are immutable (cannot be modified after creation).', 'timestamp': datetime.now()}
214
+ ]
215
+ }
216
+
217
+ # Student progress data
218
+ SAMPLE_STUDENT_PROGRESS = {
219
+ 'user1': {
220
+ 1: {'pre_class': 50, 'in_class': 80, 'post_class': 90},
221
+ 2: {'pre_class': 100, 'in_class': 75, 'post_class': 85},
222
+ 3: {'pre_class': 50, 'in_class': 0, 'post_class': 0},
223
+ 4: {'pre_class': 0, 'in_class': 0, 'post_class': 0},
224
+ 5: {'pre_class': 0, 'in_class': 0, 'post_class': 0}
225
+ }
226
+ }