Initial Commit
Browse files- .gitignore +22 -0
- README.md +5 -7
- Research Paper Attributes.txt +98 -0
- analytics.py +97 -0
- app.py +1424 -0
- chatbot.py +67 -0
- create_course.py +272 -0
- create_course2.py +331 -0
- db.py +696 -0
- entire_download.py +90 -0
- extract.py +140 -0
- file_upload_vectorize.py +179 -0
- gen_mcqs.py +206 -0
- goals2.py +658 -0
- infranew.py +231 -0
- keywords_database_download.py +104 -0
- live_polls.py +115 -0
- loldude.py +135 -0
- modify_schema.py +222 -0
- new_keywords.py +127 -0
- new_research_paper.py +103 -0
- poll_db_operations.py +70 -0
- poll_db_setup.py +35 -0
- pre_class_analytics2.py +759 -0
- pre_class_analytics4.py +592 -0
- requirements.txt +37 -0
- research22.py +517 -0
- research3.py +110 -0
- research_assistant_dashboard.py +349 -0
- research_combine.py +188 -0
- research_combine2.py +269 -0
- sciclone.py +466 -0
- session_page.py +0 -0
- ui.py +111 -0
- utils/helpers.py +83 -0
- utils/sample_data.py +226 -0
.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ignore .env file
|
2 |
+
.env
|
3 |
+
__pycache__/
|
4 |
+
newenv
|
5 |
+
backupgoal.py
|
6 |
+
backupgoal2.py
|
7 |
+
backupresearch.py
|
8 |
+
goals.py
|
9 |
+
goals3.py
|
10 |
+
research_assistant_dashboard2.py
|
11 |
+
tempCodeRunnerFile.py
|
12 |
+
all_chat_histories.json
|
13 |
+
all_chat_histories2.json
|
14 |
+
analytics.ipynb
|
15 |
+
chat_history.csv
|
16 |
+
harshal.py
|
17 |
+
course_creation.py
|
18 |
+
topics.json
|
19 |
+
new_analytics.json
|
20 |
+
new_analytics2.json
|
21 |
+
pre_class_analytics.py
|
22 |
+
sample_files/
|
README.md
CHANGED
@@ -1,13 +1,11 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.41.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
|
11 |
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: NovaScholar
|
3 |
+
emoji: 🐢
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: red
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.41.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
short_description: Generative-AI powered Flipped Classroom Learning Platform
|
11 |
---
|
|
|
|
Research Paper Attributes.txt
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Review Based Paper
|
2 |
+
Title TEXT,
|
3 |
+
Publication TEXT,
|
4 |
+
Journal_Conference TEXT,
|
5 |
+
Abstract TEXT,
|
6 |
+
Keywords TEXT,
|
7 |
+
Author TEXT
|
8 |
+
Date_of_Publication TEXT,
|
9 |
+
Intro TEXT,
|
10 |
+
Literature_Review TEXT,
|
11 |
+
Body: TEXT
|
12 |
+
Protocol: TEXT
|
13 |
+
Search String: TEXT
|
14 |
+
Included Studies: TEXT
|
15 |
+
Data Collection and Analysis Methods: TEXT
|
16 |
+
Data Extraction Table: TEXT
|
17 |
+
Synthesis and Analysis: TEXT
|
18 |
+
Conclusion
|
19 |
+
Limitations
|
20 |
+
Results
|
21 |
+
References
|
22 |
+
|
23 |
+
Risk of Bias Assessment:Opinion/Perspective Based Paper
|
24 |
+
Title TEXT,
|
25 |
+
Publication TEXT,
|
26 |
+
Journal_Conference TEXT,
|
27 |
+
Abstract TEXT,
|
28 |
+
Keywords TEXT,
|
29 |
+
Author TEXT,
|
30 |
+
Date_of_Publication TEXT,
|
31 |
+
Intro TEXT,
|
32 |
+
Literature_Review TEXT
|
33 |
+
Introduction: TEXT
|
34 |
+
Body: TEXT
|
35 |
+
Results and Discussion:TEXT
|
36 |
+
Conclusion: TEXT
|
37 |
+
References: TEXT
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
Empirical Research Paper
|
55 |
+
Title TEXT,
|
56 |
+
Publication TEXT,
|
57 |
+
Journal_Conference TEXT,
|
58 |
+
Abstract TEXT,
|
59 |
+
Keywords TEXT,
|
60 |
+
Author TEXT,
|
61 |
+
Date_of_Publication TEXT,
|
62 |
+
Intro TEXT,
|
63 |
+
Literature_Review TEXT
|
64 |
+
Introduction: TEXT
|
65 |
+
Body: TEXT
|
66 |
+
Methodology: TEXT
|
67 |
+
Participants: TEXT - Describes the sample and the sampling methods used.
|
68 |
+
Survey Instrument: TEXT - Describes the design and development of the survey questionnaire.
|
69 |
+
Data Collection: TEXT - Explains how the survey data was collected.
|
70 |
+
Data Analysis: TEXT - Details the statistical techniques used to analyze the data.
|
71 |
+
|
72 |
+
|
73 |
+
Results and Discussion:TEXT
|
74 |
+
Conclusion: TEXT
|
75 |
+
References: TEXT
|
76 |
+
Research Paper (Other)
|
77 |
+
Title TEXT,
|
78 |
+
Publication TEXT,
|
79 |
+
Journal_Conference TEXT,
|
80 |
+
Abstract TEXT,
|
81 |
+
Keywords TEXT,
|
82 |
+
Author TEXT,
|
83 |
+
Date_of_Publication TEXT,
|
84 |
+
Intro TEXT,
|
85 |
+
Literature_Review TEXT,
|
86 |
+
Research_Models_Used TEXT,
|
87 |
+
Methodology TEXT,
|
88 |
+
Discussion TEXT,
|
89 |
+
Future_Scope TEXT,
|
90 |
+
Theory TEXT,
|
91 |
+
Independent_Variables TEXT,
|
92 |
+
nof_Independent_Variables INTEGER,
|
93 |
+
Dependent_Variables TEXT,
|
94 |
+
nof_Dependent_Variables INTEGER,
|
95 |
+
Control_Variables TEXT,
|
96 |
+
Extraneous_Variables TEXT,
|
97 |
+
nof_Control_Variables INTEGER,
|
98 |
+
nof_Extraneous_Variables INTEGER
|
analytics.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from numpy.linalg import norm
|
5 |
+
from pymongo import MongoClient
|
6 |
+
import openai
|
7 |
+
from openai import OpenAI
|
8 |
+
import streamlit as st
|
9 |
+
from datetime import datetime
|
10 |
+
|
11 |
+
# MongoDB connection
|
12 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
13 |
+
|
14 |
+
client = MongoClient(MONGO_URI)
|
15 |
+
db = client['digital_nova']
|
16 |
+
themes_collection = db['themes']
|
17 |
+
corpus_collection = db['corpus']
|
18 |
+
vectors_collection = db['vectors'] # Reference to 'vectors' collection
|
19 |
+
users_collection = db['users']
|
20 |
+
|
21 |
+
# Function to create embeddings
|
22 |
+
def create_embeddings(text, openai_api_key):
|
23 |
+
client = OpenAI(api_key=openai_api_key)
|
24 |
+
response = client.embeddings.create(
|
25 |
+
input=text,
|
26 |
+
model="text-embedding-3-small"
|
27 |
+
)
|
28 |
+
return response.data[0].embedding
|
29 |
+
|
30 |
+
# Function to calculate cosine similarity
|
31 |
+
def cosine_similarity(v1, v2):
|
32 |
+
v1 = np.array(v1)
|
33 |
+
v2 = np.array(v2)
|
34 |
+
dot_product = np.dot(v1, v2)
|
35 |
+
norm_product = norm(v1) * norm(v2)
|
36 |
+
return dot_product / norm_product if norm_product != 0 else 0
|
37 |
+
|
38 |
+
def derive_analytics(goal, reference_text, openai_api_key, context=None, synoptic=None):
|
39 |
+
"""
|
40 |
+
Analyze subjective answers with respect to pre-class materials and synoptic, and provide detailed feedback
|
41 |
+
|
42 |
+
Args:
|
43 |
+
goal (str): Analysis objective
|
44 |
+
reference_text (str): Student's answer text
|
45 |
+
openai_api_key (str): OpenAI API key
|
46 |
+
context (str, optional): Pre-class material content for comparison
|
47 |
+
synoptic (str, optional): Synoptic content for evaluation
|
48 |
+
"""
|
49 |
+
template = f"""Given a student's answer to a subjective question, analyze it following these specific guidelines. Compare it with the provided pre-class materials and synoptic (if available) to assess correctness and completeness.
|
50 |
+
|
51 |
+
1. Analyze the text as an experienced educational assessor, considering:
|
52 |
+
- Conceptual understanding
|
53 |
+
- Factual accuracy
|
54 |
+
- Completeness of response
|
55 |
+
- Use of relevant terminology
|
56 |
+
- Application of concepts
|
57 |
+
|
58 |
+
2. Structure the output in markdown with two sections:
|
59 |
+
|
60 |
+
**Correctness Assessment**
|
61 |
+
- Rate overall correctness on a scale of 1-10
|
62 |
+
|
63 |
+
**Evidence-Based Feedback**
|
64 |
+
- Provide specific evidence from the student's answer to justify the score reduction
|
65 |
+
- Highlight the exact lines or phrases that need improvement
|
66 |
+
|
67 |
+
Pre-class Materials Context:
|
68 |
+
{context if context else "No reference materials provided"}
|
69 |
+
|
70 |
+
Synoptic:
|
71 |
+
{synoptic if synoptic else "No synoptic provided"}
|
72 |
+
|
73 |
+
Student's Answer:
|
74 |
+
{reference_text}
|
75 |
+
|
76 |
+
Rules:
|
77 |
+
- Base assessment strictly on provided content
|
78 |
+
- Be specific in feedback and suggestions
|
79 |
+
"""
|
80 |
+
|
81 |
+
# Initialize OpenAI client
|
82 |
+
client = OpenAI(api_key=openai_api_key)
|
83 |
+
|
84 |
+
try:
|
85 |
+
response = client.chat.completions.create(
|
86 |
+
model="gpt-4-0125-preview",
|
87 |
+
messages=[
|
88 |
+
{"role": "system", "content": "You are an educational assessment expert."},
|
89 |
+
{"role": "user", "content": template}
|
90 |
+
],
|
91 |
+
temperature=0.7
|
92 |
+
)
|
93 |
+
analysis = response.choices[0].message.content
|
94 |
+
return analysis
|
95 |
+
except Exception as e:
|
96 |
+
print(f"Error in generating analysis with OpenAI: {str(e)}")
|
97 |
+
return "Error generating analysis"
|
app.py
ADDED
@@ -0,0 +1,1424 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import streamlit as st
|
3 |
+
from datetime import datetime, date, time, timedelta
|
4 |
+
from pathlib import Path
|
5 |
+
from utils.sample_data import SAMPLE_COURSES, SAMPLE_SESSIONS
|
6 |
+
from session_page import display_session_content
|
7 |
+
from db import (
|
8 |
+
courses_collection2,
|
9 |
+
faculty_collection,
|
10 |
+
students_collection,
|
11 |
+
research_assistants_collection,
|
12 |
+
analysts_collection,
|
13 |
+
)
|
14 |
+
from werkzeug.security import generate_password_hash, check_password_hash
|
15 |
+
import os
|
16 |
+
from openai import OpenAI
|
17 |
+
from dotenv import load_dotenv
|
18 |
+
from create_course2 import create_course, courses_collection, generate_perplexity_response, generate_session_resources, PERPLEXITY_API_KEY, validate_course_plan
|
19 |
+
import json
|
20 |
+
from bson import ObjectId
|
21 |
+
client = OpenAI(api_key=os.getenv("OPENAI_KEY"))
|
22 |
+
from dotenv import load_dotenv
|
23 |
+
|
24 |
+
load_dotenv()
|
25 |
+
# PERPLEXITY_API_KEY = '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'
|
26 |
+
|
27 |
+
def get_research_papers(query):
|
28 |
+
"""Get research paper recommendations based on query"""
|
29 |
+
try:
|
30 |
+
response = client.chat.completions.create(
|
31 |
+
model="gpt-3.5-turbo",
|
32 |
+
messages=[
|
33 |
+
{
|
34 |
+
"role": "system",
|
35 |
+
"content": "You are a helpful research assistant. Provide 10 relevant research papers with titles, authors, brief descriptions, and DOI/URL links. Format each paper as: \n\n1. **Title**\nAuthors: [names]\nLink: [DOI/URL]\nDescription: [brief summary]",
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"role": "user",
|
39 |
+
"content": f"Give me 10 research papers about: {query}. Include valid DOI links or URLs to the papers where available.",
|
40 |
+
},
|
41 |
+
],
|
42 |
+
)
|
43 |
+
return response.choices[0].message.content
|
44 |
+
except Exception as e:
|
45 |
+
return f"Error getting recommendations: {str(e)}"
|
46 |
+
|
47 |
+
|
48 |
+
def analyze_research_gaps(papers):
|
49 |
+
"""Analyze gaps in research based on recommended papers"""
|
50 |
+
try:
|
51 |
+
response = client.chat.completions.create(
|
52 |
+
model="gpt-3.5-turbo",
|
53 |
+
messages=[
|
54 |
+
{
|
55 |
+
"role": "system",
|
56 |
+
"content": "You are a research analysis expert. Based on the provided papers, identify potential research gaps and future research directions.",
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"role": "user",
|
60 |
+
"content": f"Based on these papers, what are the key areas that need more research?\n\nPapers:\n{papers}",
|
61 |
+
},
|
62 |
+
],
|
63 |
+
)
|
64 |
+
return response.choices[0].message.content
|
65 |
+
except Exception as e:
|
66 |
+
return f"Error analyzing research gaps: {str(e)}"
|
67 |
+
|
68 |
+
|
69 |
+
def init_session_state():
|
70 |
+
"""Initialize session state variables"""
|
71 |
+
if "authenticated" not in st.session_state:
|
72 |
+
st.session_state.authenticated = False
|
73 |
+
if "user_id" not in st.session_state:
|
74 |
+
st.session_state.user_id = None
|
75 |
+
if "user_type" not in st.session_state:
|
76 |
+
st.session_state.user_type = None
|
77 |
+
if "username" not in st.session_state:
|
78 |
+
st.session_state.username = None
|
79 |
+
if "selected_course" not in st.session_state:
|
80 |
+
st.session_state.selected_course = None
|
81 |
+
if "show_create_course_form" not in st.session_state:
|
82 |
+
st.session_state.show_create_course_form = False
|
83 |
+
if "show_create_session_form" not in st.session_state:
|
84 |
+
st.session_state.show_create_session_form = False
|
85 |
+
if "show_enroll_course_page" not in st.session_state:
|
86 |
+
st.session_state.show_enroll_course_page = False
|
87 |
+
if "course_to_enroll" not in st.session_state:
|
88 |
+
st.session_state.course_to_enroll = None
|
89 |
+
|
90 |
+
def login_user(username, password, user_type):
|
91 |
+
"""Login user based on credentials"""
|
92 |
+
if user_type == "student":
|
93 |
+
# user = students_collection.find_one({"full_name": username}) or students_collection.find_one({"username": username})
|
94 |
+
user = students_collection.find_one({"$or": [{"full_name": username}, {"username": username}]})
|
95 |
+
elif user_type == "faculty":
|
96 |
+
user = faculty_collection.find_one({"full_name": username})
|
97 |
+
elif user_type == "research_assistant":
|
98 |
+
user = research_assistants_collection.find_one({"full_name": username})
|
99 |
+
elif user_type == "analyst":
|
100 |
+
user = analysts_collection.find_one({"full_name": username})
|
101 |
+
|
102 |
+
if user and check_password_hash(user["password"], password):
|
103 |
+
st.session_state.user_id = user["_id"]
|
104 |
+
print(st.session_state.user_id)
|
105 |
+
st.session_state.authenticated = True
|
106 |
+
st.session_state.user_type = user_type
|
107 |
+
st.session_state.username = username
|
108 |
+
return True
|
109 |
+
return False
|
110 |
+
|
111 |
+
# def login_form():
|
112 |
+
# """Display login form"""
|
113 |
+
# st.title("Welcome to NOVAScholar")
|
114 |
+
|
115 |
+
# with st.form("login_form"):
|
116 |
+
|
117 |
+
# user_type = st.selectbox(
|
118 |
+
# "Please select your Role", ["student", "faculty", "research_assistant", "analyst"]
|
119 |
+
# )
|
120 |
+
# username = st.text_input("Username")
|
121 |
+
# password = st.text_input("Password", type="password")
|
122 |
+
# submit = st.form_submit_button("Login")
|
123 |
+
|
124 |
+
# if submit:
|
125 |
+
# if login_user(username, password, user_type):
|
126 |
+
# st.success("Login successful!")
|
127 |
+
# st.rerun()
|
128 |
+
# else:
|
129 |
+
# st.error("Invalid credentials!")
|
130 |
+
def login_form():
|
131 |
+
"""Display enhanced login form"""
|
132 |
+
st.title("Welcome to NOVAScholar")
|
133 |
+
|
134 |
+
with st.form("login_form"):
|
135 |
+
# Role selection at the top
|
136 |
+
user_type = st.selectbox(
|
137 |
+
"Please select your Role",
|
138 |
+
["student", "faculty", "research_assistant", "analyst"]
|
139 |
+
)
|
140 |
+
|
141 |
+
# Username/email and password stacked vertically
|
142 |
+
username = st.text_input("Username or Email")
|
143 |
+
password = st.text_input("Password", type="password")
|
144 |
+
|
145 |
+
# Login button
|
146 |
+
submit = st.form_submit_button("Login")
|
147 |
+
|
148 |
+
if submit:
|
149 |
+
# Handle both username and email login
|
150 |
+
if '@' in username:
|
151 |
+
username = extract_username(username)
|
152 |
+
|
153 |
+
if login_user(username, password, user_type):
|
154 |
+
st.success("Login successful!")
|
155 |
+
st.rerun()
|
156 |
+
else:
|
157 |
+
st.error("Invalid credentials!")
|
158 |
+
|
159 |
+
def get_courses(username, user_type):
|
160 |
+
if user_type == "student":
|
161 |
+
student = students_collection.find_one({"$or": [{"full_name": username}, {"username": username}]})
|
162 |
+
if student:
|
163 |
+
enrolled_course_ids = [
|
164 |
+
course["course_id"] for course in student.get("enrolled_courses", [])
|
165 |
+
]
|
166 |
+
courses = courses_collection.find(
|
167 |
+
{"course_id": {"$in": enrolled_course_ids}}
|
168 |
+
)
|
169 |
+
# courses += courses_collection2.find(
|
170 |
+
# {"course_id": {"$in": enrolled_course_ids}}
|
171 |
+
# )
|
172 |
+
# # course_titles = [course['title'] for course in courses]
|
173 |
+
# return list(courses)
|
174 |
+
# courses_cursor1 = courses_collection.find(
|
175 |
+
# {"course_id": {"$in": enrolled_course_ids}}
|
176 |
+
# )
|
177 |
+
# courses_cursor2 = courses_collection2.find(
|
178 |
+
# {"course_id": {"$in": enrolled_course_ids}}
|
179 |
+
# )
|
180 |
+
# courses = list(courses_cursor1) + list(courses_cursor2)
|
181 |
+
return list(courses)
|
182 |
+
elif user_type == "faculty":
|
183 |
+
faculty = faculty_collection.find_one({"full_name": username})
|
184 |
+
if faculty:
|
185 |
+
course_ids = [
|
186 |
+
course["course_id"] for course in faculty.get("courses_taught", [])
|
187 |
+
]
|
188 |
+
# courses_1 = list(courses_collection2.find({"course_id": {"$in": course_ids}}))
|
189 |
+
courses_2 = list(courses_collection.find({"course_id": {"$in": course_ids}}))
|
190 |
+
return courses_2
|
191 |
+
elif user_type == "research_assistant":
|
192 |
+
research_assistant = research_assistants_collection.find_one(
|
193 |
+
{"full_name": username}
|
194 |
+
)
|
195 |
+
if research_assistant:
|
196 |
+
course_ids = [
|
197 |
+
course["course_id"]
|
198 |
+
for course in research_assistant.get("courses_assisted", [])
|
199 |
+
]
|
200 |
+
courses = courses_collection2.find({"course_id": {"$in": course_ids}})
|
201 |
+
return list(courses)
|
202 |
+
else:
|
203 |
+
return []
|
204 |
+
|
205 |
+
|
206 |
+
def get_course_ids():
|
207 |
+
"""Get course IDs for sample courses"""
|
208 |
+
return [course["course_id"] for course in SAMPLE_COURSES]
|
209 |
+
|
210 |
+
|
211 |
+
def get_sessions(course_id, course_title):
|
212 |
+
"""Get sessions for a given course ID"""
|
213 |
+
course = courses_collection.find_one({"course_id": course_id, "title": course_title})
|
214 |
+
if course:
|
215 |
+
return course.get("sessions", [])
|
216 |
+
return []
|
217 |
+
|
218 |
+
|
219 |
+
def create_session(new_session, course_id):
|
220 |
+
"""Create a new session for a given course ID"""
|
221 |
+
course = courses_collection2.find_one({"course_id": course_id}) | courses_collection.find_one({"course_id": course_id})
|
222 |
+
if course:
|
223 |
+
last_session_id = max((session["session_id"] for session in course["sessions"]))
|
224 |
+
last_session_id = int(last_session_id[1:])
|
225 |
+
new_session_id = last_session_id + 1
|
226 |
+
new_session["session_id"] = "S" + str(new_session_id)
|
227 |
+
courses_collection2.update_one(
|
228 |
+
{"course_id": new_session["course_id"]},
|
229 |
+
{"$push": {"sessions": new_session}},
|
230 |
+
)
|
231 |
+
return True
|
232 |
+
return False
|
233 |
+
|
234 |
+
|
235 |
+
def create_session_form(course_id):
|
236 |
+
"""Display form to create a new session and perform the creation operation"""
|
237 |
+
st.title("Create New Session")
|
238 |
+
|
239 |
+
if 'session_time' not in st.session_state:
|
240 |
+
st.session_state.session_time = datetime.now().time()
|
241 |
+
if 'show_create_session_form' not in st.session_state:
|
242 |
+
st.session_state.show_create_session_form = False
|
243 |
+
|
244 |
+
with st.form("create_session_form"):
|
245 |
+
session_title = st.text_input("Session Title")
|
246 |
+
session_date = st.date_input("Session Date", date.today(), key="session_date")
|
247 |
+
session_time = st.time_input(
|
248 |
+
"Session Time", st.session_state.session_time, key="session_time"
|
249 |
+
)
|
250 |
+
|
251 |
+
new_session_id = None
|
252 |
+
# Generate new session ID
|
253 |
+
course = courses_collection2.find_one({"course_id": course_id})
|
254 |
+
if course and "sessions" in course and course["sessions"]:
|
255 |
+
last_session_id = max(
|
256 |
+
int(session["session_id"][1:]) for session in course["sessions"]
|
257 |
+
)
|
258 |
+
new_session_id = last_session_id + 1
|
259 |
+
else:
|
260 |
+
new_session_id = 1
|
261 |
+
|
262 |
+
if st.form_submit_button("Create Session"):
|
263 |
+
clicked = True
|
264 |
+
new_session = {
|
265 |
+
"session_id": f"S{new_session_id}",
|
266 |
+
"course_id": course_id,
|
267 |
+
"title": session_title,
|
268 |
+
"date": datetime.combine(session_date, session_time),
|
269 |
+
"status": "upcoming",
|
270 |
+
"created_at": datetime.utcnow(),
|
271 |
+
"pre_class": {
|
272 |
+
"resources": [],
|
273 |
+
"completetion_required": True,
|
274 |
+
},
|
275 |
+
"in_class": {
|
276 |
+
"topics": [],
|
277 |
+
"quiz": {"title": "", "questions": 0, "duration": 0},
|
278 |
+
"polls": [],
|
279 |
+
},
|
280 |
+
"post_class": {
|
281 |
+
"assignments": [],
|
282 |
+
},
|
283 |
+
}
|
284 |
+
courses_collection2.update_one(
|
285 |
+
{"course_id": course_id}, {"$push": {"sessions": new_session}}
|
286 |
+
)
|
287 |
+
st.success("Session created successfully!")
|
288 |
+
st.session_state.show_create_session_form = False
|
289 |
+
|
290 |
+
# new_session_id = None
|
291 |
+
# creation_success = False
|
292 |
+
# # Generate new session ID
|
293 |
+
# course = courses_collection2.find_one({"course_id": course_id})
|
294 |
+
# if course and 'sessions' in course and course['sessions']:
|
295 |
+
# last_session_id = max((session['session_id'] for session in course['sessions']))
|
296 |
+
# last_session_id = int(last_session_id[1:])
|
297 |
+
# new_session_id = last_session_id + 1
|
298 |
+
# else:
|
299 |
+
# new_session_id = 1
|
300 |
+
|
301 |
+
# new_session = {
|
302 |
+
# "session_id": 'S' + new_session_id,
|
303 |
+
# "title": session_title,
|
304 |
+
# "date": datetime.datetime.combine(session_date, session_time).isoformat(),
|
305 |
+
# "status": "upcoming",
|
306 |
+
# "created_at": datetime.datetime.utcnow().isoformat(),
|
307 |
+
# "pre_class": {
|
308 |
+
# "resources": [],
|
309 |
+
# "completetion_required": True,
|
310 |
+
# },
|
311 |
+
# "in_class": {
|
312 |
+
# "topics": [],
|
313 |
+
# "quiz":
|
314 |
+
# {
|
315 |
+
# "title": '',
|
316 |
+
# "questions": 0,
|
317 |
+
# "duration": 0
|
318 |
+
# },
|
319 |
+
# "polls": []
|
320 |
+
# },
|
321 |
+
# "post_class": {
|
322 |
+
# "assignments": [],
|
323 |
+
# }
|
324 |
+
# }
|
325 |
+
# courses_collection2.update_one(
|
326 |
+
# {"course_id": course_id},
|
327 |
+
# {"$push": {"sessions": new_session}}
|
328 |
+
# )
|
329 |
+
# creation_success = True
|
330 |
+
# st.form_submit_button("Create Session")
|
331 |
+
# if creation_success == True:
|
332 |
+
# st.success("Session created successfully!")
|
333 |
+
# else:
|
334 |
+
|
335 |
+
|
336 |
+
def get_new_student_id():
|
337 |
+
"""Generate a new student ID by incrementing the last student ID"""
|
338 |
+
last_student = students_collection.find_one(sort=[("SID", -1)])
|
339 |
+
if last_student:
|
340 |
+
last_student_id = int(last_student["SID"][1:])
|
341 |
+
new_student_id = f"S{last_student_id + 1}"
|
342 |
+
else:
|
343 |
+
new_student_id = "S101"
|
344 |
+
return new_student_id
|
345 |
+
|
346 |
+
|
347 |
+
def get_new_faculty_id():
|
348 |
+
"""Generate a new faculty ID by incrementing the last faculty ID"""
|
349 |
+
last_faculty = faculty_collection.find_one(sort=[("TID", -1)])
|
350 |
+
if last_faculty:
|
351 |
+
last_faculty_id = int(last_faculty["TID"][1:])
|
352 |
+
new_faculty_id = f"T{last_faculty_id + 1}"
|
353 |
+
else:
|
354 |
+
new_faculty_id = "T101"
|
355 |
+
return new_faculty_id
|
356 |
+
|
357 |
+
|
358 |
+
def get_new_course_id():
|
359 |
+
"""Generate a new course ID by incrementing the last course ID"""
|
360 |
+
last_course = courses_collection2.find_one(sort=[("course_id", -1)])
|
361 |
+
if last_course:
|
362 |
+
last_course_id = int(last_course["course_id"][2:])
|
363 |
+
new_course_id = f"CS{last_course_id + 1}"
|
364 |
+
else:
|
365 |
+
new_course_id = "CS101"
|
366 |
+
return new_course_id
|
367 |
+
|
368 |
+
|
369 |
+
# def register_page():
|
370 |
+
# st.title("Register")
|
371 |
+
# if "user_type" not in st.session_state:
|
372 |
+
# st.session_state.user_type = "student"
|
373 |
+
|
374 |
+
# # Select user type
|
375 |
+
# st.session_state.user_type = st.selectbox(
|
376 |
+
# "Select User Type", ["student", "faculty", "research_assistant"]
|
377 |
+
# )
|
378 |
+
# user_type = st.session_state.user_type
|
379 |
+
# print(user_type)
|
380 |
+
|
381 |
+
# with st.form("register_form"):
|
382 |
+
# # user_type = st.selectbox("Select User Type", ["student", "faculty", "research_assistant"])
|
383 |
+
# # print(user_type)
|
384 |
+
# full_name = st.text_input("Full Name")
|
385 |
+
# password = st.text_input("Password", type="password")
|
386 |
+
# confirm_password = st.text_input("Confirm Password", type="password")
|
387 |
+
|
388 |
+
# if user_type == "student":
|
389 |
+
# # Fetch courses for students to select from
|
390 |
+
# courses = list(courses_collection2.find({}, {"course_id": 1, "title": 1}))
|
391 |
+
# course_options = [
|
392 |
+
# f"{course['title']} ({course['course_id']})" for course in courses
|
393 |
+
# ]
|
394 |
+
# selected_courses = st.multiselect("Available Courses", course_options)
|
395 |
+
|
396 |
+
# submit = st.form_submit_button("Register")
|
397 |
+
|
398 |
+
# if submit:
|
399 |
+
# if password == confirm_password:
|
400 |
+
# hashed_password = generate_password_hash(password)
|
401 |
+
# if user_type == "student":
|
402 |
+
# new_student_id = get_new_student_id()
|
403 |
+
# enrolled_courses = [
|
404 |
+
# {
|
405 |
+
# "course_id": course.split("(")[-1][:-1],
|
406 |
+
# "title": course.split(" (")[0],
|
407 |
+
# }
|
408 |
+
# for course in selected_courses
|
409 |
+
# ]
|
410 |
+
# students_collection.insert_one(
|
411 |
+
# {
|
412 |
+
# "SID": new_student_id,
|
413 |
+
# "full_name": full_name,
|
414 |
+
# "password": hashed_password,
|
415 |
+
# "enrolled_courses": enrolled_courses,
|
416 |
+
# "created_at": datetime.utcnow(),
|
417 |
+
# }
|
418 |
+
# )
|
419 |
+
# st.success(
|
420 |
+
# f"Student registered successfully with ID: {new_student_id}"
|
421 |
+
# )
|
422 |
+
# elif user_type == "faculty":
|
423 |
+
# new_faculty_id = get_new_faculty_id()
|
424 |
+
# faculty_collection.insert_one(
|
425 |
+
# {
|
426 |
+
# "TID": new_faculty_id,
|
427 |
+
# "full_name": full_name,
|
428 |
+
# "password": hashed_password,
|
429 |
+
# "courses_taught": [],
|
430 |
+
# "created_at": datetime.utcnow(),
|
431 |
+
# }
|
432 |
+
# )
|
433 |
+
# st.success(
|
434 |
+
# f"Faculty registered successfully with ID: {new_faculty_id}"
|
435 |
+
# )
|
436 |
+
# elif user_type == "research_assistant":
|
437 |
+
# research_assistants_collection.insert_one(
|
438 |
+
# {
|
439 |
+
# "full_name": full_name,
|
440 |
+
# "password": hashed_password,
|
441 |
+
# "created_at": datetime.utcnow(),
|
442 |
+
# }
|
443 |
+
# )
|
444 |
+
# st.success("Research Assistant registered successfully!")
|
445 |
+
# else:
|
446 |
+
# st.error("Passwords do not match")
|
447 |
+
|
448 |
+
|
449 |
+
def get_new_analyst_id():
|
450 |
+
"""Generate a new analyst ID by incrementing the last analyst ID"""
|
451 |
+
last_analyst = analysts_collection.find_one(sort=[("AID", -1)])
|
452 |
+
if last_analyst:
|
453 |
+
last_id = int(last_analyst["AID"][1:])
|
454 |
+
new_id = f"A{last_id + 1}"
|
455 |
+
else:
|
456 |
+
new_id = "A1"
|
457 |
+
return new_id
|
458 |
+
|
459 |
+
|
460 |
+
# def register_page():
|
461 |
+
# st.title("Register")
|
462 |
+
# if "user_type" not in st.session_state:
|
463 |
+
# st.session_state.user_type = "student"
|
464 |
+
|
465 |
+
# # Select user type
|
466 |
+
# st.session_state.user_type = st.selectbox(
|
467 |
+
# "Please select your Role", ["student", "faculty", "research_assistant", "analyst"]
|
468 |
+
# )
|
469 |
+
# user_type = st.session_state.user_type
|
470 |
+
# print(user_type)
|
471 |
+
|
472 |
+
# with st.form("register_form"):
|
473 |
+
|
474 |
+
# full_name = st.text_input("Full Name")
|
475 |
+
# password = st.text_input("Password", type="password")
|
476 |
+
# confirm_password = st.text_input("Confirm Password", type="password")
|
477 |
+
|
478 |
+
# if user_type == "student":
|
479 |
+
# # Fetch courses for students to select from
|
480 |
+
# courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
|
481 |
+
# course_options = [
|
482 |
+
# f"{course['title']} ({course['course_id']})" for course in courses
|
483 |
+
# ]
|
484 |
+
# selected_courses = st.multiselect("Available Courses", course_options)
|
485 |
+
|
486 |
+
# submit = st.form_submit_button("Register")
|
487 |
+
|
488 |
+
# if submit:
|
489 |
+
# if password == confirm_password:
|
490 |
+
# hashed_password = generate_password_hash(password)
|
491 |
+
# if user_type == "student":
|
492 |
+
# new_student_id = get_new_student_id()
|
493 |
+
# enrolled_courses = [
|
494 |
+
# {
|
495 |
+
# "course_id": course.split("(")[-1][:-1],
|
496 |
+
# "title": course.split(" (")[0],
|
497 |
+
# }
|
498 |
+
# for course in selected_courses
|
499 |
+
# ]
|
500 |
+
# students_collection.insert_one(
|
501 |
+
# {
|
502 |
+
# "SID": new_student_id,
|
503 |
+
# "full_name": full_name,
|
504 |
+
# "password": hashed_password,
|
505 |
+
# "enrolled_courses": enrolled_courses,
|
506 |
+
# "created_at": datetime.utcnow(),
|
507 |
+
# }
|
508 |
+
# )
|
509 |
+
# st.success(
|
510 |
+
# f"Student registered successfully with ID: {new_student_id}"
|
511 |
+
# )
|
512 |
+
# elif user_type == "faculty":
|
513 |
+
# new_faculty_id = get_new_faculty_id()
|
514 |
+
# faculty_collection.insert_one(
|
515 |
+
# {
|
516 |
+
# "TID": new_faculty_id,
|
517 |
+
# "full_name": full_name,
|
518 |
+
# "password": hashed_password,
|
519 |
+
# "courses_taught": [],
|
520 |
+
# "created_at": datetime.utcnow(),
|
521 |
+
# }
|
522 |
+
# )
|
523 |
+
# st.success(
|
524 |
+
# f"Faculty registered successfully with ID: {new_faculty_id}"
|
525 |
+
# )
|
526 |
+
# elif user_type == "research_assistant":
|
527 |
+
# research_assistants_collection.insert_one(
|
528 |
+
# {
|
529 |
+
# "full_name": full_name,
|
530 |
+
# "password": hashed_password,
|
531 |
+
# "created_at": datetime.utcnow(),
|
532 |
+
# }
|
533 |
+
# )
|
534 |
+
# st.success("Research Assistant registered successfully!")
|
535 |
+
# elif user_type == "analyst":
|
536 |
+
# # new_analyst_id = get_new_analyst_id()
|
537 |
+
# analysts_collection.insert_one(
|
538 |
+
# {
|
539 |
+
# # "AID": new_analyst_id,
|
540 |
+
# "full_name": full_name,
|
541 |
+
# "password": hashed_password,
|
542 |
+
# "created_at": datetime.utcnow(),
|
543 |
+
# }
|
544 |
+
# )
|
545 |
+
# st.success("Analyst registered successfully!")
|
546 |
+
# else:
|
547 |
+
# st.error("Passwords do not match")
|
548 |
+
def register_page():
|
549 |
+
st.title("Register for NOVAScholar")
|
550 |
+
if "user_type" not in st.session_state:
|
551 |
+
st.session_state.user_type = "student"
|
552 |
+
|
553 |
+
# Select user type
|
554 |
+
st.session_state.user_type = st.selectbox(
|
555 |
+
"Please select your Role",
|
556 |
+
["student", "faculty", "research_assistant", "analyst"]
|
557 |
+
)
|
558 |
+
user_type = st.session_state.user_type
|
559 |
+
|
560 |
+
with st.form("register_form"):
|
561 |
+
col1, col2 = st.columns(2)
|
562 |
+
|
563 |
+
with col1:
|
564 |
+
full_name = st.text_input("Full Name")
|
565 |
+
email = st.text_input("Institutional Email")
|
566 |
+
phone = st.text_input("Phone Number")
|
567 |
+
|
568 |
+
with col2:
|
569 |
+
password = st.text_input("Password", type="password")
|
570 |
+
confirm_password = st.text_input("Confirm Password", type="password")
|
571 |
+
|
572 |
+
if user_type == "student":
|
573 |
+
courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
|
574 |
+
course_options = [f"{course['title']} ({course['course_id']})" for course in courses]
|
575 |
+
selected_courses = st.multiselect("Available Courses", course_options)
|
576 |
+
|
577 |
+
submit = st.form_submit_button("Register")
|
578 |
+
|
579 |
+
if submit:
|
580 |
+
# Validate email
|
581 |
+
email_valid, email_msg = validate_email(email)
|
582 |
+
if not email_valid:
|
583 |
+
st.error(email_msg)
|
584 |
+
return
|
585 |
+
|
586 |
+
# Validate phone
|
587 |
+
phone_valid, phone_msg = validate_phone(phone)
|
588 |
+
if not phone_valid:
|
589 |
+
st.error(phone_msg)
|
590 |
+
return
|
591 |
+
|
592 |
+
# Validate password match
|
593 |
+
if password != confirm_password:
|
594 |
+
st.error("Passwords do not match")
|
595 |
+
return
|
596 |
+
|
597 |
+
# Extract username from email
|
598 |
+
username = extract_username(email)
|
599 |
+
|
600 |
+
# Check if username already exists
|
601 |
+
if user_type == "student":
|
602 |
+
existing_user = students_collection.find_one({"username": username})
|
603 |
+
elif user_type == "faculty":
|
604 |
+
existing_user = faculty_collection.find_one({"username": username})
|
605 |
+
elif user_type == "research_assistant":
|
606 |
+
existing_user = research_assistants_collection.find_one({"username": username})
|
607 |
+
elif user_type == "analyst":
|
608 |
+
existing_user = analysts_collection.find_one({"username": username})
|
609 |
+
|
610 |
+
if existing_user:
|
611 |
+
st.error("A user with this email already exists")
|
612 |
+
return
|
613 |
+
|
614 |
+
# Hash password and create user
|
615 |
+
hashed_password = generate_password_hash(password)
|
616 |
+
|
617 |
+
user_data = {
|
618 |
+
"username": username,
|
619 |
+
"full_name": full_name,
|
620 |
+
"email": email,
|
621 |
+
"phone": phone,
|
622 |
+
"password": hashed_password,
|
623 |
+
"created_at": datetime.utcnow()
|
624 |
+
}
|
625 |
+
|
626 |
+
if user_type == "student":
|
627 |
+
new_student_id = get_new_student_id()
|
628 |
+
enrolled_courses = [
|
629 |
+
{
|
630 |
+
"course_id": course.split("(")[-1][:-1],
|
631 |
+
"title": course.split(" (")[0],
|
632 |
+
}
|
633 |
+
for course in selected_courses
|
634 |
+
]
|
635 |
+
user_data["SID"] = new_student_id
|
636 |
+
user_data["enrolled_courses"] = enrolled_courses
|
637 |
+
students_collection.insert_one(user_data)
|
638 |
+
st.success(f"Student registered successfully! Your username is: {username}")
|
639 |
+
|
640 |
+
elif user_type == "faculty":
|
641 |
+
new_faculty_id = get_new_faculty_id()
|
642 |
+
user_data["TID"] = new_faculty_id
|
643 |
+
user_data["courses_taught"] = []
|
644 |
+
faculty_collection.insert_one(user_data)
|
645 |
+
st.success(f"Faculty registered successfully! Your username is: {username}")
|
646 |
+
|
647 |
+
elif user_type == "research_assistant":
|
648 |
+
research_assistants_collection.insert_one(user_data)
|
649 |
+
st.success(f"Research Assistant registered successfully! Your username is: {username}")
|
650 |
+
|
651 |
+
elif user_type == "analyst":
|
652 |
+
analysts_collection.insert_one(user_data)
|
653 |
+
st.success(f"Analyst registered successfully! Your username is: {username}")
|
654 |
+
|
655 |
+
# Create Course feature
|
656 |
+
# def create_course_form2(faculty_name, faculty_id):
|
657 |
+
# """Display enhanced form to create a new course with AI-generated content"""
|
658 |
+
# st.title("Create New Course")
|
659 |
+
|
660 |
+
# if 'course_plan' not in st.session_state:
|
661 |
+
# st.session_state.course_plan = None
|
662 |
+
# if 'edit_mode' not in st.session_state:
|
663 |
+
# st.session_state.edit_mode = False
|
664 |
+
|
665 |
+
# # Initial Course Creation Form
|
666 |
+
# if not st.session_state.course_plan:
|
667 |
+
# with st.form("initial_course_form"):
|
668 |
+
# col1, col2 = st.columns(2)
|
669 |
+
# with col1:
|
670 |
+
# course_name = st.text_input("Course Name", placeholder="e.g., Introduction to Computer Science")
|
671 |
+
# faculty_info = st.text_input("Faculty", value=faculty_name, disabled=True)
|
672 |
+
# with col2:
|
673 |
+
# duration_weeks = st.number_input("Duration (weeks)", min_value=1, max_value=16, value=12)
|
674 |
+
# start_date = st.date_input("Start Date")
|
675 |
+
|
676 |
+
# generate_button = st.form_submit_button("Generate Course Structure", use_container_width=True)
|
677 |
+
|
678 |
+
# if generate_button and course_name:
|
679 |
+
# with st.spinner("Generating course structure..."):
|
680 |
+
# try:
|
681 |
+
# course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
|
682 |
+
# # print(course_plan)
|
683 |
+
# st.session_state.course_plan = json.loads(course_plan)
|
684 |
+
# st.session_state.start_date = start_date
|
685 |
+
# st.session_state.duration_weeks = duration_weeks
|
686 |
+
# st.rerun()
|
687 |
+
# except Exception as e:
|
688 |
+
# st.error(f"Error generating course structure: {e}")
|
689 |
+
|
690 |
+
# # Display and Edit Generated Course Content
|
691 |
+
# if st.session_state.course_plan:
|
692 |
+
# with st.expander("Course Overview", expanded=True):
|
693 |
+
# if not st.session_state.edit_mode:
|
694 |
+
# st.subheader(st.session_state.course_plan['course_title'])
|
695 |
+
# st.write(st.session_state.course_plan['course_description'])
|
696 |
+
# edit_button = st.button("Edit Course Details", use_container_width=True)
|
697 |
+
# if edit_button:
|
698 |
+
# st.session_state.edit_mode = True
|
699 |
+
# st.rerun()
|
700 |
+
# else:
|
701 |
+
# with st.form("edit_course_details"):
|
702 |
+
# st.session_state.course_plan['course_title'] = st.text_input(
|
703 |
+
# "Course Title",
|
704 |
+
# value=st.session_state.course_plan['course_title']
|
705 |
+
# )
|
706 |
+
# st.session_state.course_plan['course_description'] = st.text_area(
|
707 |
+
# "Course Description",
|
708 |
+
# value=st.session_state.course_plan['course_description']
|
709 |
+
# )
|
710 |
+
# if st.form_submit_button("Save Course Details"):
|
711 |
+
# st.session_state.edit_mode = False
|
712 |
+
# st.rerun()
|
713 |
+
|
714 |
+
# # Display Modules and Sessions
|
715 |
+
# st.subheader("Course Modules and Sessions")
|
716 |
+
|
717 |
+
# start_date = st.session_state.start_date
|
718 |
+
# current_date = start_date
|
719 |
+
|
720 |
+
# all_sessions = []
|
721 |
+
# for module_idx, module in enumerate(st.session_state.course_plan['modules']):
|
722 |
+
# with st.expander(f"📚 Module {module_idx + 1}: {module['module_title']}", expanded=True):
|
723 |
+
# # Edit module title
|
724 |
+
# new_module_title = st.text_input(
|
725 |
+
# f"Module {module_idx + 1} Title",
|
726 |
+
# value=module['module_title'],
|
727 |
+
# key=f"module_{module_idx}"
|
728 |
+
# )
|
729 |
+
# module['module_title'] = new_module_title
|
730 |
+
|
731 |
+
# for sub_idx, sub_module in enumerate(module['sub_modules']):
|
732 |
+
# st.markdown(f"### 📖 {sub_module['title']}")
|
733 |
+
|
734 |
+
# # Create sessions for each topic
|
735 |
+
# for topic_idx, topic in enumerate(sub_module['topics']):
|
736 |
+
# session_key = f"session_{module_idx}_{sub_idx}_{topic_idx}"
|
737 |
+
|
738 |
+
# with st.container():
|
739 |
+
# col1, col2, col3 = st.columns([3, 2, 1])
|
740 |
+
# with col1:
|
741 |
+
# new_topic = st.text_input(
|
742 |
+
# "Topic",
|
743 |
+
# value=topic,
|
744 |
+
# key=f"{session_key}_topic"
|
745 |
+
# )
|
746 |
+
# sub_module['topics'][topic_idx] = new_topic
|
747 |
+
|
748 |
+
# with col2:
|
749 |
+
# session_date = st.date_input(
|
750 |
+
# "Session Date",
|
751 |
+
# value=current_date,
|
752 |
+
# key=f"{session_key}_date"
|
753 |
+
# )
|
754 |
+
|
755 |
+
# with col3:
|
756 |
+
# session_status = st.selectbox(
|
757 |
+
# "Status",
|
758 |
+
# options=["upcoming", "in-progress", "completed"],
|
759 |
+
# key=f"{session_key}_status"
|
760 |
+
# )
|
761 |
+
|
762 |
+
# # Create session object
|
763 |
+
# session = {
|
764 |
+
# "session_id": str(ObjectId()),
|
765 |
+
# "title": new_topic,
|
766 |
+
# "date": datetime.combine(session_date, datetime.min.time()),
|
767 |
+
# "status": session_status,
|
768 |
+
# "module_name": module['module_title'],
|
769 |
+
# "created_at": datetime.utcnow(),
|
770 |
+
# "pre_class": {
|
771 |
+
# "resources": [],
|
772 |
+
# "completion_required": True
|
773 |
+
# },
|
774 |
+
# "in_class": {
|
775 |
+
# "quiz": [],
|
776 |
+
# "polls": []
|
777 |
+
# },
|
778 |
+
# "post_class": {
|
779 |
+
# "assignments": []
|
780 |
+
# }
|
781 |
+
# }
|
782 |
+
# all_sessions.append(session)
|
783 |
+
# current_date = session_date + timedelta(days=7)
|
784 |
+
|
785 |
+
# new_course_id = get_new_course_id()
|
786 |
+
# course_title = st.session_state.course_plan['course_title']
|
787 |
+
# # Final Save Button
|
788 |
+
# if st.button("Save Course", type="primary", use_container_width=True):
|
789 |
+
# try:
|
790 |
+
# course_doc = {
|
791 |
+
# "course_id": new_course_id,
|
792 |
+
# "title": course_title,
|
793 |
+
# "description": st.session_state.course_plan['course_description'],
|
794 |
+
# "faculty": faculty_name,
|
795 |
+
# "faculty_id": faculty_id,
|
796 |
+
# "duration": f"{st.session_state.duration_weeks} weeks",
|
797 |
+
# "start_date": datetime.combine(st.session_state.start_date, datetime.min.time()),
|
798 |
+
# "created_at": datetime.utcnow(),
|
799 |
+
# "sessions": all_sessions
|
800 |
+
# }
|
801 |
+
|
802 |
+
# # Insert into database
|
803 |
+
# courses_collection.insert_one(course_doc)
|
804 |
+
|
805 |
+
# st.success("Course successfully created!")
|
806 |
+
|
807 |
+
# # Update faculty collection
|
808 |
+
# faculty_collection.update_one(
|
809 |
+
# {"_id": st.session_state.user_id},
|
810 |
+
# {
|
811 |
+
# "$push": {
|
812 |
+
# "courses_taught": {
|
813 |
+
# "course_id": new_course_id,
|
814 |
+
# "title": course_title,
|
815 |
+
# }
|
816 |
+
# }
|
817 |
+
# },
|
818 |
+
# )
|
819 |
+
|
820 |
+
# # Clear session state
|
821 |
+
# st.session_state.course_plan = None
|
822 |
+
# st.session_state.edit_mode = False
|
823 |
+
|
824 |
+
# # Optional: Add a button to view the created course
|
825 |
+
# if st.button("View Course"):
|
826 |
+
# # Add navigation logic here
|
827 |
+
# pass
|
828 |
+
|
829 |
+
# except Exception as e:
|
830 |
+
# st.error(f"Error saving course: {e}")
|
831 |
+
|
832 |
+
|
833 |
+
def remove_json_backticks(json_string):
|
834 |
+
"""Remove backticks and 'json' from the JSON object string"""
|
835 |
+
return json_string.replace("```json", "").replace("```", "").strip()
|
836 |
+
|
837 |
+
|
838 |
+
def create_course_form(faculty_name, faculty_id):
|
839 |
+
"""Display enhanced form to create a new course with AI-generated content and resources"""
|
840 |
+
|
841 |
+
st.title("Create New Course")
|
842 |
+
|
843 |
+
if 'course_plan' not in st.session_state:
|
844 |
+
st.session_state.course_plan = None
|
845 |
+
if 'edit_mode' not in st.session_state:
|
846 |
+
st.session_state.edit_mode = False
|
847 |
+
if 'resources_map' not in st.session_state:
|
848 |
+
st.session_state.resources_map = {}
|
849 |
+
if 'start_date' not in st.session_state:
|
850 |
+
st.session_state.start_date = None
|
851 |
+
if 'duration_weeks' not in st.session_state:
|
852 |
+
st.session_state.duration_weeks = None
|
853 |
+
if 'sessions_per_week' not in st.session_state:
|
854 |
+
st.session_state.sessions_per_week = None
|
855 |
+
|
856 |
+
|
857 |
+
# Initial Course Creation Form
|
858 |
+
if not st.session_state.course_plan:
|
859 |
+
with st.form("initial_course_form"):
|
860 |
+
col1, col2 = st.columns(2)
|
861 |
+
with col1:
|
862 |
+
course_name = st.text_input("Course Name", placeholder="e.g., Introduction to Computer Science")
|
863 |
+
faculty_info = st.text_input("Faculty", value=faculty_name, disabled=True)
|
864 |
+
sessions_per_week = st.number_input("Sessions Per Week", min_value=1, max_value=5, value=2)
|
865 |
+
with col2:
|
866 |
+
duration_weeks = st.number_input("Duration (weeks)", min_value=1, max_value=16, value=12)
|
867 |
+
start_date = st.date_input("Start Date")
|
868 |
+
|
869 |
+
generate_button = st.form_submit_button("Generate Course Structure", use_container_width=True)
|
870 |
+
|
871 |
+
if generate_button and course_name:
|
872 |
+
with st.spinner("Generating course structure and resources..."):
|
873 |
+
try:
|
874 |
+
# Generate course plan with resources
|
875 |
+
course_plan = generate_perplexity_response(
|
876 |
+
PERPLEXITY_API_KEY,
|
877 |
+
course_name,
|
878 |
+
duration_weeks,
|
879 |
+
sessions_per_week
|
880 |
+
)
|
881 |
+
try:
|
882 |
+
course_plan_json = json.loads(course_plan)
|
883 |
+
validate_course_plan(course_plan_json)
|
884 |
+
st.session_state.course_plan = course_plan_json
|
885 |
+
except (json.JSONDecodeError, ValueError) as e:
|
886 |
+
st.error(f"Error in course plan structure: {e}")
|
887 |
+
return
|
888 |
+
st.session_state.start_date = start_date
|
889 |
+
st.session_state.duration_weeks = duration_weeks
|
890 |
+
st.session_state.sessions_per_week = sessions_per_week
|
891 |
+
|
892 |
+
# Generate resources for all sessions
|
893 |
+
session_titles = []
|
894 |
+
for module in st.session_state.course_plan['modules']:
|
895 |
+
for sub_module in module['sub_modules']:
|
896 |
+
for topic in sub_module['topics']:
|
897 |
+
# session_titles.append(topic['title'])
|
898 |
+
# session_titles.append(topic)
|
899 |
+
if isinstance(topic, dict):
|
900 |
+
session_titles.append(topic['title'])
|
901 |
+
else:
|
902 |
+
session_titles.append(topic)
|
903 |
+
# In generate_session_resources function, add validation:
|
904 |
+
if not session_titles:
|
905 |
+
return json.dumps({"session_resources": []})
|
906 |
+
resources_response = generate_session_resources(PERPLEXITY_API_KEY, session_titles)
|
907 |
+
without_backticks = remove_json_backticks(resources_response)
|
908 |
+
resources = json.loads(without_backticks)
|
909 |
+
st.session_state.resources_map = {
|
910 |
+
resource['session_title']: resource['resources']
|
911 |
+
for resource in resources['session_resources']
|
912 |
+
}
|
913 |
+
# Add error handling for the resources map
|
914 |
+
# if st.session_state.resources_map is None:
|
915 |
+
# st.session_state.resources_map = {}
|
916 |
+
|
917 |
+
st.rerun()
|
918 |
+
except Exception as e:
|
919 |
+
st.error(f"Error generating course structure: {e}")
|
920 |
+
|
921 |
+
# Display and Edit Generated Course Content
|
922 |
+
if st.session_state.course_plan:
|
923 |
+
with st.expander("Course Overview", expanded=True):
|
924 |
+
if not st.session_state.edit_mode:
|
925 |
+
st.subheader(st.session_state.course_plan['course_title'])
|
926 |
+
st.write(st.session_state.course_plan['course_description'])
|
927 |
+
col1, col2, col3 = st.columns(3)
|
928 |
+
with col1:
|
929 |
+
st.write(f"**Start Date:** {st.session_state.start_date}")
|
930 |
+
with col2:
|
931 |
+
st.write(f"**Duration (weeks):** {st.session_state.duration_weeks}")
|
932 |
+
with col3:
|
933 |
+
st.write(f"**Sessions Per Week:** {st.session_state.sessions_per_week}")
|
934 |
+
|
935 |
+
edit_button = st.button("Edit Course Details", use_container_width=True)
|
936 |
+
if edit_button:
|
937 |
+
st.session_state.edit_mode = True
|
938 |
+
st.rerun()
|
939 |
+
else:
|
940 |
+
with st.form("edit_course_details"):
|
941 |
+
st.session_state.course_plan['course_title'] = st.text_input(
|
942 |
+
"Course Title",
|
943 |
+
value=st.session_state.course_plan['course_title']
|
944 |
+
)
|
945 |
+
st.session_state.course_plan['course_description'] = st.text_area(
|
946 |
+
"Course Description",
|
947 |
+
value=st.session_state.course_plan['course_description']
|
948 |
+
)
|
949 |
+
if st.form_submit_button("Save Course Details"):
|
950 |
+
st.session_state.edit_mode = False
|
951 |
+
st.rerun()
|
952 |
+
|
953 |
+
# Display Modules and Sessions
|
954 |
+
st.subheader("Course Modules and Sessions")
|
955 |
+
|
956 |
+
start_date = st.session_state.start_date
|
957 |
+
current_date = start_date
|
958 |
+
|
959 |
+
all_sessions = []
|
960 |
+
for module_idx, module in enumerate(st.session_state.course_plan['modules']):
|
961 |
+
with st.expander(f"📚 Module {module_idx + 1}: {module['module_title']}", expanded=True):
|
962 |
+
# Edit module title
|
963 |
+
new_module_title = st.text_input(
|
964 |
+
f"Edit Module Title",
|
965 |
+
value=module['module_title'],
|
966 |
+
key=f"module_{module_idx}"
|
967 |
+
)
|
968 |
+
module['module_title'] = new_module_title
|
969 |
+
|
970 |
+
for sub_idx, sub_module in enumerate(module['sub_modules']):
|
971 |
+
st.markdown("<br>", unsafe_allow_html=True) # Add gap between sessions
|
972 |
+
# st.markdown(f"### 📖 {sub_module['title']}")
|
973 |
+
st.markdown(f'<h3 style="font-size: 1.25rem;">📖 Chapter {sub_idx + 1}: {sub_module["title"]}</h3>', unsafe_allow_html=True)
|
974 |
+
# Possible fix:
|
975 |
+
# Inside the loop where topics are being processed:
|
976 |
+
|
977 |
+
for topic_idx, topic in enumerate(sub_module['topics']):
|
978 |
+
st.markdown("<br>", unsafe_allow_html=True) # Add gap between sessions
|
979 |
+
session_key = f"session_{module_idx}_{sub_idx}_{topic_idx}"
|
980 |
+
|
981 |
+
# Get topic title based on type
|
982 |
+
if isinstance(topic, dict):
|
983 |
+
current_topic_title = topic.get('title', '')
|
984 |
+
current_topic_display = current_topic_title
|
985 |
+
else:
|
986 |
+
current_topic_title = str(topic)
|
987 |
+
current_topic_display = current_topic_title
|
988 |
+
|
989 |
+
with st.container():
|
990 |
+
# Session Details
|
991 |
+
col1, col2, col3 = st.columns([3, 2, 1])
|
992 |
+
with col1:
|
993 |
+
new_topic = st.text_input(
|
994 |
+
f"Session {topic_idx + 1} Title",
|
995 |
+
value=current_topic_display,
|
996 |
+
key=f"{session_key}_topic"
|
997 |
+
)
|
998 |
+
# Update the topic in the data structure
|
999 |
+
if isinstance(topic, dict):
|
1000 |
+
topic['title'] = new_topic
|
1001 |
+
else:
|
1002 |
+
sub_module['topics'][topic_idx] = new_topic
|
1003 |
+
|
1004 |
+
with col2:
|
1005 |
+
session_date = st.date_input(
|
1006 |
+
"Session Date",
|
1007 |
+
value=current_date,
|
1008 |
+
key=f"{session_key}_date"
|
1009 |
+
)
|
1010 |
+
|
1011 |
+
with col3:
|
1012 |
+
session_status = st.selectbox(
|
1013 |
+
"Status",
|
1014 |
+
options=["upcoming", "in-progress", "completed"],
|
1015 |
+
key=f"{session_key}_status"
|
1016 |
+
)
|
1017 |
+
|
1018 |
+
# Display Resources
|
1019 |
+
if st.session_state.resources_map:
|
1020 |
+
# Try both the full topic title and the display title
|
1021 |
+
resources = None
|
1022 |
+
if isinstance(topic, dict) and topic.get('title') in st.session_state.resources_map:
|
1023 |
+
resources = st.session_state.resources_map[topic['title']]
|
1024 |
+
elif current_topic_title in st.session_state.resources_map:
|
1025 |
+
resources = st.session_state.resources_map[current_topic_title]
|
1026 |
+
|
1027 |
+
if resources:
|
1028 |
+
with st.container():
|
1029 |
+
# st.markdown("#### 📚 Session Resources")
|
1030 |
+
st.markdown(f'<h4 style="font-size: 1.25rem;">📚 Session Resources</h4>', unsafe_allow_html=True)
|
1031 |
+
# Readings Tab
|
1032 |
+
if resources.get('readings'):
|
1033 |
+
st.markdown(f'<h5 style="font-size: 1.1rem; margin-top: 1rem;">📖 External Resources</h5>', unsafe_allow_html=True)
|
1034 |
+
col1, col2 = st.columns(2)
|
1035 |
+
for idx, reading in enumerate(resources['readings']):
|
1036 |
+
with col1 if idx % 2 == 0 else col2:
|
1037 |
+
st.markdown(f"""
|
1038 |
+
- **{reading['title']}**
|
1039 |
+
- Type: {reading['type']}
|
1040 |
+
- Estimated reading time: {reading['estimated_read_time']}
|
1041 |
+
- [Access Resource]({reading['url']})
|
1042 |
+
""")
|
1043 |
+
|
1044 |
+
# Books Tab and Additional Resources Tab side-by-side
|
1045 |
+
col1, col2 = st.columns(2)
|
1046 |
+
|
1047 |
+
with col1:
|
1048 |
+
if resources.get('books'):
|
1049 |
+
st.markdown(f'<h5 style="font-size: 1.1rem; margin-top: 1rem;">📚 Reference Books</h5>', unsafe_allow_html=True)
|
1050 |
+
for book in resources['books']:
|
1051 |
+
with st.container():
|
1052 |
+
st.markdown(f"""
|
1053 |
+
- **{book['title']}**
|
1054 |
+
- Author: {book['author']}
|
1055 |
+
- ISBN: {book['isbn']}
|
1056 |
+
- Chapters: {book['chapters']}
|
1057 |
+
""")
|
1058 |
+
|
1059 |
+
with col2:
|
1060 |
+
if resources.get('additional_resources'):
|
1061 |
+
st.markdown(f'<h5 style="font-size: 1.1rem; margin-top: 1rem;">🔗 Additional Study Resources</h5>', unsafe_allow_html=True)
|
1062 |
+
for resource in resources['additional_resources']:
|
1063 |
+
with st.container():
|
1064 |
+
st.markdown(f"""
|
1065 |
+
- **{resource['title']}**
|
1066 |
+
- Type: {resource['type']}
|
1067 |
+
- Description: {resource['description']}
|
1068 |
+
- [Access Resource]({resource['url']})
|
1069 |
+
""")
|
1070 |
+
|
1071 |
+
# Create session object
|
1072 |
+
session = {
|
1073 |
+
"session_id": str(ObjectId()),
|
1074 |
+
"title": new_topic,
|
1075 |
+
"date": datetime.combine(session_date, datetime.min.time()),
|
1076 |
+
"status": session_status,
|
1077 |
+
"module_name": module['module_title'],
|
1078 |
+
"created_at": datetime.utcnow(),
|
1079 |
+
"pre_class": {
|
1080 |
+
"resources": [],
|
1081 |
+
"completion_required": True
|
1082 |
+
},
|
1083 |
+
"in_class": {
|
1084 |
+
"quiz": [],
|
1085 |
+
"polls": []
|
1086 |
+
},
|
1087 |
+
"post_class": {
|
1088 |
+
"assignments": []
|
1089 |
+
},
|
1090 |
+
"external_resources": st.session_state.resources_map.get(current_topic_title, {})
|
1091 |
+
}
|
1092 |
+
all_sessions.append(session)
|
1093 |
+
current_date = session_date + timedelta(days=7)
|
1094 |
+
|
1095 |
+
|
1096 |
+
new_course_id = get_new_course_id()
|
1097 |
+
course_title = st.session_state.course_plan['course_title']
|
1098 |
+
|
1099 |
+
# Final Save Button
|
1100 |
+
if st.button("Save Course", type="primary", use_container_width=True):
|
1101 |
+
try:
|
1102 |
+
course_doc = {
|
1103 |
+
"course_id": new_course_id,
|
1104 |
+
"title": course_title,
|
1105 |
+
"description": st.session_state.course_plan['course_description'],
|
1106 |
+
"faculty": faculty_name,
|
1107 |
+
"faculty_id": faculty_id,
|
1108 |
+
"duration": f"{st.session_state.duration_weeks} weeks",
|
1109 |
+
"sessions_per_week": st.session_state.sessions_per_week,
|
1110 |
+
"start_date": datetime.combine(st.session_state.start_date, datetime.min.time()),
|
1111 |
+
"created_at": datetime.utcnow(),
|
1112 |
+
"sessions": all_sessions
|
1113 |
+
}
|
1114 |
+
|
1115 |
+
# Insert into database
|
1116 |
+
courses_collection.insert_one(course_doc)
|
1117 |
+
st.success("Course successfully created!")
|
1118 |
+
|
1119 |
+
# Update faculty collection
|
1120 |
+
faculty_collection.update_one(
|
1121 |
+
{"_id": st.session_state.user_id},
|
1122 |
+
{
|
1123 |
+
"$push": {
|
1124 |
+
"courses_taught": {
|
1125 |
+
"course_id": new_course_id,
|
1126 |
+
"title": course_title,
|
1127 |
+
}
|
1128 |
+
}
|
1129 |
+
}
|
1130 |
+
)
|
1131 |
+
|
1132 |
+
# Clear session state
|
1133 |
+
st.session_state.course_plan = None
|
1134 |
+
st.session_state.edit_mode = False
|
1135 |
+
st.session_state.resources_map = {}
|
1136 |
+
|
1137 |
+
# Optional: Add a button to view the created course
|
1138 |
+
if st.button("View Course"):
|
1139 |
+
# Add navigation logic here
|
1140 |
+
pass
|
1141 |
+
|
1142 |
+
except Exception as e:
|
1143 |
+
st.error(f"Error saving course: {e}")
|
1144 |
+
|
1145 |
+
|
1146 |
+
|
1147 |
+
from research_assistant_dashboard import display_research_assistant_dashboard
|
1148 |
+
from goals2 import display_analyst_dashboard
|
1149 |
+
def enroll_in_course(course_id, course_title, student):
|
1150 |
+
"""Enroll a student in a course"""
|
1151 |
+
if student:
|
1152 |
+
courses = student.get("enrolled_courses", [])
|
1153 |
+
if course_id not in [course["course_id"] for course in courses]:
|
1154 |
+
course = courses_collection.find_one({"course_id": course_id})
|
1155 |
+
if course:
|
1156 |
+
courses.append(
|
1157 |
+
{
|
1158 |
+
"course_id": course["course_id"],
|
1159 |
+
"title": course["title"],
|
1160 |
+
}
|
1161 |
+
)
|
1162 |
+
students_collection.update_one(
|
1163 |
+
{"_id": st.session_state.user_id},
|
1164 |
+
{"$set": {"enrolled_courses": courses}},
|
1165 |
+
)
|
1166 |
+
st.success(f"Enrolled in course {course_title}")
|
1167 |
+
# st.experimental_rerun()
|
1168 |
+
else:
|
1169 |
+
st.error("Course not found")
|
1170 |
+
else:
|
1171 |
+
st.warning("Already enrolled in this course")
|
1172 |
+
|
1173 |
+
# def enroll_in_course_page(course_id):
|
1174 |
+
# """Enroll a student in a course"""
|
1175 |
+
# student = students_collection.find_one({"_id": st.session_state.user_id})
|
1176 |
+
# course_title = courses_collection.find_one({"course_id": course_id})["title"]
|
1177 |
+
|
1178 |
+
# course = courses_collection.find_one({"course_id": course_id})
|
1179 |
+
# if course:
|
1180 |
+
# st.title(course["title"])
|
1181 |
+
# st.subheader("Course Description:")
|
1182 |
+
# st.write(course["description"])
|
1183 |
+
# st.write(f"Faculty: {course['faculty']}")
|
1184 |
+
# st.write(f"Duration: {course['duration']}")
|
1185 |
+
|
1186 |
+
# st.title("Course Sessions")
|
1187 |
+
# for session in course["sessions"]:
|
1188 |
+
# st.write(f"Session: {session['title']}")
|
1189 |
+
# st.write(f"Date: {session['date']}")
|
1190 |
+
# st.write(f"Status: {session['status']}")
|
1191 |
+
# st.write("----")
|
1192 |
+
# else:
|
1193 |
+
# st.error("Course not found")
|
1194 |
+
|
1195 |
+
# enroll_button = st.button("Enroll in Course", key="enroll_button", use_container_width=True)
|
1196 |
+
# if enroll_button:
|
1197 |
+
# enroll_in_course(course_id, course_title, student)
|
1198 |
+
def enroll_in_course_page(course_id):
|
1199 |
+
"""Display an aesthetically pleasing course enrollment page"""
|
1200 |
+
student = students_collection.find_one({"_id": st.session_state.user_id})
|
1201 |
+
course = courses_collection.find_one({"course_id": course_id})
|
1202 |
+
|
1203 |
+
if not course:
|
1204 |
+
st.error("Course not found")
|
1205 |
+
return
|
1206 |
+
|
1207 |
+
# Create two columns for layout
|
1208 |
+
col1, col2 = st.columns([2, 1])
|
1209 |
+
|
1210 |
+
with col1:
|
1211 |
+
# Course header section
|
1212 |
+
st.title(course["title"])
|
1213 |
+
st.markdown(f"*{course['description']}*")
|
1214 |
+
|
1215 |
+
# Course details in an expander
|
1216 |
+
with st.expander("Course Details", expanded=True):
|
1217 |
+
st.markdown(f"👨🏫 **Faculty:** {course['faculty']}")
|
1218 |
+
st.markdown(f"⏱️ **Duration:** {course['duration']}")
|
1219 |
+
|
1220 |
+
# Sessions in a clean card-like format
|
1221 |
+
st.subheader("📚 Course Sessions")
|
1222 |
+
for idx, session in enumerate(course["sessions"], 1):
|
1223 |
+
with st.container():
|
1224 |
+
st.markdown(f"""
|
1225 |
+
---
|
1226 |
+
### Session {idx}: {session['title']}
|
1227 |
+
🗓️ **Date:** {session['date']}
|
1228 |
+
📌 **Status:** {session['status']}
|
1229 |
+
""")
|
1230 |
+
|
1231 |
+
with col2:
|
1232 |
+
with st.container():
|
1233 |
+
st.markdown("### Ready to Learn?")
|
1234 |
+
st.markdown("Click below to enroll in this course")
|
1235 |
+
|
1236 |
+
# Check if already enrolled
|
1237 |
+
courses = student.get("enrolled_courses", [])
|
1238 |
+
is_enrolled = course_id in [c["course_id"] for c in courses]
|
1239 |
+
|
1240 |
+
if is_enrolled:
|
1241 |
+
st.info("✅ You are already enrolled in this course")
|
1242 |
+
else:
|
1243 |
+
enroll_button = st.button(
|
1244 |
+
"🎓 Enroll Now",
|
1245 |
+
key="enroll_button",
|
1246 |
+
use_container_width=True
|
1247 |
+
)
|
1248 |
+
if enroll_button:
|
1249 |
+
enroll_in_course(course_id, course["title"], student)
|
1250 |
+
|
1251 |
+
def show_available_courses(username, user_type, user_id):
|
1252 |
+
"""Display available courses for enrollment"""
|
1253 |
+
st.title("Available Courses")
|
1254 |
+
|
1255 |
+
courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
|
1256 |
+
course_options = [
|
1257 |
+
f"{course['title']} ({course['course_id']})" for course in courses
|
1258 |
+
]
|
1259 |
+
|
1260 |
+
selected_course = st.selectbox("Select a Course to Enroll", course_options)
|
1261 |
+
# if selected_courses:
|
1262 |
+
# for course in selected_courses:
|
1263 |
+
# course_id = course.split("(")[-1][:-1]
|
1264 |
+
# course_title = course.split(" (")[0]
|
1265 |
+
# enroll_in_course(course_id, course_title, user_id)
|
1266 |
+
# st.success("Courses enrolled successfully!")
|
1267 |
+
if selected_course:
|
1268 |
+
course_id = selected_course.split("(")[-1][:-1]
|
1269 |
+
enroll_in_course_page(course_id)
|
1270 |
+
|
1271 |
+
def validate_email(email):
|
1272 |
+
"""Validate email format and domain"""
|
1273 |
+
# Basic email pattern
|
1274 |
+
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
|
1275 |
+
if not re.match(pattern, email):
|
1276 |
+
return False, "Invalid email format"
|
1277 |
+
|
1278 |
+
# You can add additional institution-specific validation here
|
1279 |
+
# For example, checking if the domain is from your institution
|
1280 |
+
# allowed_domains = ["spit.ac.in"] # Add more domains as needed
|
1281 |
+
# domain = email.split('@')[1]
|
1282 |
+
# if domain not in allowed_domains:
|
1283 |
+
# return False, "Please use your institutional email address"
|
1284 |
+
|
1285 |
+
return True, "Valid email"
|
1286 |
+
|
1287 |
+
def validate_phone(phone):
|
1288 |
+
"""Validate phone number format"""
|
1289 |
+
# Assuming Indian phone numbers
|
1290 |
+
pattern = r'^[6-9]\d{9}$'
|
1291 |
+
if not re.match(pattern, phone):
|
1292 |
+
return False, "Invalid phone number format. Please enter a 10-digit Indian mobile number"
|
1293 |
+
return True, "Valid phone number"
|
1294 |
+
|
1295 |
+
def extract_username(email):
|
1296 |
+
"""Extract username from email"""
|
1297 |
+
return email.split('@')[0]
|
1298 |
+
|
1299 |
+
|
1300 |
+
|
1301 |
+
|
1302 |
+
def main_dashboard():
|
1303 |
+
if st.session_state.user_type == "research_assistant":
|
1304 |
+
display_research_assistant_dashboard()
|
1305 |
+
elif st.session_state.user_type == "analyst":
|
1306 |
+
display_analyst_dashboard()
|
1307 |
+
else:
|
1308 |
+
selected_course_id = None
|
1309 |
+
create_session = False
|
1310 |
+
with st.sidebar:
|
1311 |
+
st.title(f"Welcome, {st.session_state.username}")
|
1312 |
+
if st.session_state.user_type == "student":
|
1313 |
+
st.title("Enrolled Courses")
|
1314 |
+
else:
|
1315 |
+
st.title("Your Courses")
|
1316 |
+
|
1317 |
+
# Course selection
|
1318 |
+
enrolled_courses = get_courses(
|
1319 |
+
st.session_state.username, st.session_state.user_type
|
1320 |
+
)
|
1321 |
+
|
1322 |
+
# Enroll in Courses
|
1323 |
+
if st.session_state.user_type == "student":
|
1324 |
+
if st.button(
|
1325 |
+
"Enroll in a New Course", key="enroll_course", use_container_width=True
|
1326 |
+
):
|
1327 |
+
st.session_state.show_enroll_course_page = True
|
1328 |
+
|
1329 |
+
# if st.session_state.show_enroll_course_form:
|
1330 |
+
# courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
|
1331 |
+
# courses += list(courses_collection2.find({}, {"course_id": 1, "title": 1}))
|
1332 |
+
# course_options = [f"{course['title']} ({course['course_id']})" for course in courses]
|
1333 |
+
# course_to_enroll = st.selectbox("Available Courses", course_options)
|
1334 |
+
# st.session_state.course_to_enroll = course_to_enroll
|
1335 |
+
|
1336 |
+
if st.session_state.user_type == "faculty":
|
1337 |
+
if st.button(
|
1338 |
+
"Create New Course", key="create_course", use_container_width=True
|
1339 |
+
):
|
1340 |
+
st.session_state.show_create_course_form = True
|
1341 |
+
|
1342 |
+
if not enrolled_courses:
|
1343 |
+
st.warning("No courses found")
|
1344 |
+
else:
|
1345 |
+
course_titles = [course["title"] for course in enrolled_courses]
|
1346 |
+
course_ids = [course["course_id"] for course in enrolled_courses]
|
1347 |
+
|
1348 |
+
selected_course = st.selectbox("Select Course", course_titles)
|
1349 |
+
selected_course_id = course_ids[course_titles.index(selected_course)]
|
1350 |
+
print("Selected Course ID: ", selected_course_id)
|
1351 |
+
|
1352 |
+
st.session_state.selected_course = selected_course
|
1353 |
+
st.session_state.selected_course_id = selected_course_id
|
1354 |
+
|
1355 |
+
# Display course sessions
|
1356 |
+
sessions = get_sessions(selected_course_id, selected_course)
|
1357 |
+
|
1358 |
+
st.title("Course Sessions")
|
1359 |
+
for i, session in enumerate(sessions, start=1):
|
1360 |
+
if st.button(
|
1361 |
+
f"Session {i}", key=f"session_{i}", use_container_width=True
|
1362 |
+
):
|
1363 |
+
st.session_state.selected_session = session
|
1364 |
+
|
1365 |
+
if st.session_state.user_type == "faculty":
|
1366 |
+
# Create new session
|
1367 |
+
# create_session = st.button("Create New Session Button", key="create_session", use_container_width=True)
|
1368 |
+
if st.button(
|
1369 |
+
"Create New Session",
|
1370 |
+
key="create_session",
|
1371 |
+
use_container_width=True,
|
1372 |
+
):
|
1373 |
+
st.session_state.show_create_session_form = True
|
1374 |
+
|
1375 |
+
if st.button("Logout", use_container_width=True):
|
1376 |
+
for key in st.session_state.keys():
|
1377 |
+
del st.session_state[key]
|
1378 |
+
st.rerun()
|
1379 |
+
|
1380 |
+
# if create_session:
|
1381 |
+
# create_session_form(selected_course_id)
|
1382 |
+
if st.session_state.get("show_create_course_form"):
|
1383 |
+
create_course_form(st.session_state.username, st.session_state.user_id)
|
1384 |
+
elif st.session_state.get("show_create_session_form"):
|
1385 |
+
create_session_form(selected_course_id)
|
1386 |
+
elif st.session_state.get("show_enroll_course_page"):
|
1387 |
+
show_available_courses(st.session_state.username, st.session_state.user_type, st.session_state.user_id)
|
1388 |
+
else:
|
1389 |
+
# Main content
|
1390 |
+
if "selected_session" in st.session_state:
|
1391 |
+
display_session_content(
|
1392 |
+
st.session_state.user_id,
|
1393 |
+
selected_course_id,
|
1394 |
+
st.session_state.selected_session,
|
1395 |
+
st.session_state.username,
|
1396 |
+
st.session_state.user_type,
|
1397 |
+
)
|
1398 |
+
else:
|
1399 |
+
st.info("Select a session to view details")
|
1400 |
+
# # Main content
|
1401 |
+
# if 'selected_session' in st.session_state:
|
1402 |
+
# display_session_content(st.session_state.user_id, selected_course_id, st.session_state.selected_session, st.session_state.username, st.session_state.user_type)
|
1403 |
+
# if create_session:
|
1404 |
+
# create_session_form(selected_course_id)
|
1405 |
+
|
1406 |
+
|
1407 |
+
def main():
|
1408 |
+
st.set_page_config(page_title="NOVAScholar", page_icon="📚", layout="wide")
|
1409 |
+
init_session_state()
|
1410 |
+
# modify_courses_collection_schema()
|
1411 |
+
|
1412 |
+
if not st.session_state.authenticated:
|
1413 |
+
login_tab, register_tab = st.tabs(["Login", "Register"])
|
1414 |
+
|
1415 |
+
with register_tab:
|
1416 |
+
register_page()
|
1417 |
+
with login_tab:
|
1418 |
+
login_form()
|
1419 |
+
else:
|
1420 |
+
main_dashboard()
|
1421 |
+
|
1422 |
+
|
1423 |
+
if __name__ == "__main__":
|
1424 |
+
main()
|
chatbot.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import datetime
|
3 |
+
from db import courses_collection2, faculty_collection, students_collection, vectors_collection, chat_history_collection
|
4 |
+
from PIL import Image
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import os
|
7 |
+
from datetime import datetime
|
8 |
+
from bson import ObjectId
|
9 |
+
from file_upload_vectorize import model
|
10 |
+
from gen_mcqs import generate_mcqs, quizzes_collection
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
14 |
+
OPENAI_KEY = os.getenv('OPENAI_KEY')
|
15 |
+
GEMINI_KEY = os.getenv('GEMINI_KEY')
|
16 |
+
|
17 |
+
def insert_chat_message(user_id, session_id, role, content):
|
18 |
+
message = {
|
19 |
+
"role": role,
|
20 |
+
"content": content,
|
21 |
+
"timestamp": datetime.utcnow()
|
22 |
+
}
|
23 |
+
|
24 |
+
chat_history_collection.update_one(
|
25 |
+
{"user_id": ObjectId(user_id), "session_id": session_id},
|
26 |
+
{"$push": {"messages": message}, "$set": {"timestamp": datetime.utcnow()}},
|
27 |
+
upsert=True
|
28 |
+
)
|
29 |
+
|
30 |
+
def give_chat_response(user_id, session_id, question, title, description, context):
|
31 |
+
context_prompt = f"""
|
32 |
+
Based on the following session title, description, and context, answer the user's question in 3-4 lines:
|
33 |
+
|
34 |
+
Title: {title}
|
35 |
+
Description: {description}
|
36 |
+
Context: {context}
|
37 |
+
|
38 |
+
Question: {question}
|
39 |
+
|
40 |
+
Please provide a clear and concise answer based on the information provided.
|
41 |
+
"""
|
42 |
+
|
43 |
+
response = model.generate_content(context_prompt)
|
44 |
+
if not response or not response.text:
|
45 |
+
return "No response received from the model"
|
46 |
+
|
47 |
+
assistant_response = response.text.strip()
|
48 |
+
|
49 |
+
# Save the chat message
|
50 |
+
insert_chat_message(user_id, session_id, "assistant", assistant_response)
|
51 |
+
|
52 |
+
return assistant_response
|
53 |
+
|
54 |
+
def create_quiz_by_context(user_id, session_id, context, length, session_title, session_description):
|
55 |
+
"""Create a quiz based on the context provided"""
|
56 |
+
quiz = generate_mcqs(context, length, session_title, session_description)
|
57 |
+
if not quiz:
|
58 |
+
return "No quiz generated";
|
59 |
+
|
60 |
+
# Save the quiz
|
61 |
+
quizzes_collection.insert_one({
|
62 |
+
"user_id": ObjectId(user_id),
|
63 |
+
"session_id": ObjectId(session_id),
|
64 |
+
"questions": quiz,
|
65 |
+
"timestamp": datetime.utcnow()
|
66 |
+
})
|
67 |
+
return "Quiz created successfully"
|
create_course.py
ADDED
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
import os
|
3 |
+
from typing import Dict, List, Any
|
4 |
+
from pymongo import MongoClient
|
5 |
+
import requests
|
6 |
+
import uuid
|
7 |
+
import openai
|
8 |
+
from openai import OpenAI
|
9 |
+
import streamlit as st
|
10 |
+
from bson import ObjectId
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
import json
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
MONGODB_URI = os.getenv("MONGO_URI")
|
16 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
|
17 |
+
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
|
18 |
+
|
19 |
+
client = MongoClient(MONGODB_URI)
|
20 |
+
db = client['novascholar_db']
|
21 |
+
courses_collection = db['courses']
|
22 |
+
|
23 |
+
def generate_perplexity_response(api_key, course_name):
|
24 |
+
headers = {
|
25 |
+
"accept": "application/json",
|
26 |
+
"content-type": "application/json",
|
27 |
+
"authorization": f"Bearer {api_key}"
|
28 |
+
}
|
29 |
+
|
30 |
+
prompt = f"""
|
31 |
+
You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate comprehensive, academically rigorous course structures for undergraduate level education.
|
32 |
+
|
33 |
+
Please generate a detailed course structure for the course {course_name} in JSON format following these specifications:
|
34 |
+
|
35 |
+
1. The course structure should be appropriate for a full semester (14-16 weeks)
|
36 |
+
2. Each module should be designed for 2-4 weeks of instruction
|
37 |
+
3. Follow standard academic practices and nomenclature
|
38 |
+
4. Ensure progressive complexity from foundational to advanced concepts
|
39 |
+
5. The course_title should exactly match the course name provided in the prompt. No additional information should be included in the course_title field.
|
40 |
+
6: Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
|
41 |
+
7. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
42 |
+
|
43 |
+
|
44 |
+
The JSON response should follow this structure:
|
45 |
+
{{
|
46 |
+
"course_title": "string",
|
47 |
+
"course_description": "string",
|
48 |
+
"modules": [
|
49 |
+
{{
|
50 |
+
"module_title": "string",
|
51 |
+
"sub_modules": [
|
52 |
+
{{
|
53 |
+
"title": "string",
|
54 |
+
"topics": [string],
|
55 |
+
}}
|
56 |
+
]
|
57 |
+
}}
|
58 |
+
]
|
59 |
+
}}
|
60 |
+
|
61 |
+
Example response:
|
62 |
+
{{
|
63 |
+
"course_title": "Advanced Natural Language Processing",
|
64 |
+
"course_descriptio": "An advanced course covering modern approaches to NLP using deep learning, with focus on transformer architectures and their applications.",
|
65 |
+
"modules": [
|
66 |
+
{{
|
67 |
+
"module_title": "Foundations of Modern NLP",
|
68 |
+
"sub_modules": [
|
69 |
+
{{
|
70 |
+
"title": "Attention Mechanism",
|
71 |
+
"topics": [
|
72 |
+
"Self-attention",
|
73 |
+
"Multi-head attention",
|
74 |
+
"Positional encoding"
|
75 |
+
]
|
76 |
+
}}
|
77 |
+
]
|
78 |
+
}}
|
79 |
+
]
|
80 |
+
}}
|
81 |
+
"""
|
82 |
+
|
83 |
+
messages = [
|
84 |
+
{
|
85 |
+
"role": "system",
|
86 |
+
"content": (
|
87 |
+
"You are an expert educational AI assistant specializing in course design and curriculum planning. "
|
88 |
+
"Your task is to generate accurate, detailed, and structured educational content for undergraduate-level and post-graduate-level courses. "
|
89 |
+
"Provide detailed and accurate information tailored to the user's prompt."
|
90 |
+
"Ensure that the responses are logical, follow standard academic practices, and include realistic concepts relevant to the course."
|
91 |
+
),
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"role": "user",
|
95 |
+
"content": prompt
|
96 |
+
},
|
97 |
+
]
|
98 |
+
try:
|
99 |
+
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
100 |
+
response = client.chat.completions.create(
|
101 |
+
model="llama-3.1-sonar-small-128k-online",
|
102 |
+
messages=messages
|
103 |
+
)
|
104 |
+
content = response.choices[0].message.content
|
105 |
+
return content
|
106 |
+
except Exception as e:
|
107 |
+
st.error(f"Failed to fetch data from Perplexity API: {e}")
|
108 |
+
return ""
|
109 |
+
|
110 |
+
def get_new_course_id():
|
111 |
+
"""Generate a new course ID by incrementing the last course ID"""
|
112 |
+
last_course = courses_collection.find_one(sort=[("course_id", -1)])
|
113 |
+
if last_course:
|
114 |
+
last_course_id = int(last_course["course_id"][2:])
|
115 |
+
new_course_id = f"CS{last_course_id + 1}"
|
116 |
+
else:
|
117 |
+
new_course_id = "CS101"
|
118 |
+
return new_course_id
|
119 |
+
|
120 |
+
|
121 |
+
def create_course(course_name, start_date, duration_weeks):
|
122 |
+
# Generate course overview
|
123 |
+
# overview_prompt = f"""Generate an overview for the undergraduate course {course_name}
|
124 |
+
# Include all relevant concepts and key topics covered in a typical curriculum.
|
125 |
+
# The response should be concise (300-400 words). Ensure that your response is in a valid JSON format."""
|
126 |
+
|
127 |
+
# overview_prompt2 = f"""Generate an overview for the undergraduate course {course_name}.
|
128 |
+
# The overview should include:
|
129 |
+
# The course title, a detailed course description,
|
130 |
+
# a division of all relevant concepts and key topics into 4-6 logical modules,
|
131 |
+
# capturing the flow and structure of a typical curriculum.
|
132 |
+
# Ensure the response adheres to the following JSON format:
|
133 |
+
# {{
|
134 |
+
# 'overview': 'string',
|
135 |
+
# 'modules': [
|
136 |
+
# {{
|
137 |
+
# 'name': 'string',
|
138 |
+
# 'description': 'string'
|
139 |
+
# }}
|
140 |
+
# ]
|
141 |
+
# }}
|
142 |
+
# overview: A detailed description of the course.
|
143 |
+
# modules: An array of 4-6 objects, each representing a logical module with a name and a brief description
|
144 |
+
# **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}"""
|
145 |
+
|
146 |
+
# course_overview = generate_perplexity_response(PERPLEXITY_API_KEY, overview_prompt2)
|
147 |
+
# # print(course_overview)
|
148 |
+
# course_overview_store = course_overview
|
149 |
+
# # print(course_overview_store)
|
150 |
+
# # Generate modules
|
151 |
+
# # modules_prompt = f"Based on this overview: {course_overview}\nCreate 4-6 logical modules for the course, each module should group related concepts and each module may include reference books if applicable"
|
152 |
+
# sub_modules_prompt = f"""Using the provided modules in the overview {course_overview_store}, generate 2-3 submodules for each module.
|
153 |
+
# Each submodule should represent a cohesive subset of the module's topics, logically organized for teaching purposes.
|
154 |
+
# Ensure the response adheres to the following JSON format:
|
155 |
+
# {
|
156 |
+
# 'modules': [
|
157 |
+
# {
|
158 |
+
# 'name': 'string',
|
159 |
+
# 'sub_modules': [
|
160 |
+
# {
|
161 |
+
# 'name': 'string',
|
162 |
+
# 'description': 'string'
|
163 |
+
# }
|
164 |
+
# ]
|
165 |
+
# }
|
166 |
+
# ]
|
167 |
+
# }
|
168 |
+
# modules: An array where each object contains the name of the module and its corresponding sub_modules.
|
169 |
+
# sub_modules: An array of 2-3 objects for each module, each having a name and a brief description."
|
170 |
+
# **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}
|
171 |
+
# """
|
172 |
+
# sub_modules = generate_perplexity_response(PERPLEXITY_API_KEY, sub_modules_prompt)
|
173 |
+
|
174 |
+
# # modules_response = generate_perplexity_response(modules_prompt)
|
175 |
+
# print(sub_modules)
|
176 |
+
|
177 |
+
# total_sessions = duration_weeks * sessions_per_week
|
178 |
+
|
179 |
+
course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
|
180 |
+
course_plan_json = json.loads(course_plan)
|
181 |
+
|
182 |
+
# Generate sessions for each module
|
183 |
+
all_sessions = []
|
184 |
+
for module in course_plan_json['modules']:
|
185 |
+
for sub_module in module['sub_modules']:
|
186 |
+
for topic in sub_module['topics']:
|
187 |
+
session = create_session(
|
188 |
+
title=topic,
|
189 |
+
date=start_date,
|
190 |
+
module_name=module['module_title']
|
191 |
+
)
|
192 |
+
# print(session)
|
193 |
+
all_sessions.append(session)
|
194 |
+
start_date += timedelta(days=7) # Next session after a week
|
195 |
+
|
196 |
+
# sample_sessions = [
|
197 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
198 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
199 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def27'), 'title': 'Types of Generative AI (e.g., GANs, VAEs, LLMs)', 'date': datetime(2025, 1, 5, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 505626), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
200 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def28'), 'title': 'Overview of popular GenAI tools (e.g., ChatGPT, Claude, Google Gemini)', 'date': datetime(2025, 1, 12, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
201 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def29'), 'title': 'Frameworks for building GenAI models (e.g., TensorFlow, PyTorch)', 'date': datetime(2025, 1, 19, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
202 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2a'), 'title': 'Integration with other AI technologies', 'date': datetime(2025, 1, 26, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 507612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
203 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2b'), 'title': 'Text-to-text models (e.g., GPT-3, BERT)', 'date': datetime(2025, 2, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
204 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2c'), 'title': 'Text generation for content creation and marketing', 'date': datetime(2025, 2, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
205 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2d'), 'title': 'Chatbots and conversational interfaces', 'date': datetime(2025, 2, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
206 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2e'), 'title': 'Generative Adversarial Networks (GANs)', 'date': datetime(2025, 2, 23, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
207 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2f'), 'title': 'Variational Autoencoders (VAEs)', 'date': datetime(2025, 3, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 510612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
208 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def30'), 'title': 'Applications in art, design, and media', 'date': datetime(2025, 3, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
209 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def31'), 'title': 'Understanding prompt design principles', 'date': datetime(2025, 3, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
210 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def33'), 'title': 'Advanced techniques for fine-tuning models', 'date': datetime(2025, 3, 30, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 512514), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
211 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def34'), 'title': 'Ethical implications of AI-generated content', 'date': datetime(2025, 4, 6, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 513613), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
212 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def35'), 'title': 'Addressing bias in AI models', 'date': datetime(2025, 4, 13, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
213 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def36'), 'title': 'Regulatory frameworks and guidelines', 'date': datetime(2025, 4, 20, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
214 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def37'), 'title': 'Case studies from various industries (e.g., marketing, healthcare, finance)', 'date': datetime(2025, 4, 27, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
215 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def38'), 'title': 'Success stories and challenges faced by companies using GenAI', 'date': datetime(2025, 5, 4, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
216 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def39'), 'title': 'Guidelines for developing a GenAI project', 'date': datetime(2025, 5, 11, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
217 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def3a'), 'title': 'Tools and resources for project implementation', 'date': datetime(2025, 5, 18, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
218 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def3b'), 'title': 'Best practices for testing and deployment', 'date': datetime(2025, 5, 25, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 517563), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}}
|
219 |
+
# ]
|
220 |
+
|
221 |
+
# small_sample_sessions = [
|
222 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
223 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
224 |
+
# ]
|
225 |
+
|
226 |
+
|
227 |
+
# print(all_sessions)
|
228 |
+
|
229 |
+
print("Number of sessions:", len(all_sessions))
|
230 |
+
# Create course document
|
231 |
+
# course_description = course_plan_json['course_description']
|
232 |
+
# course_doc = {
|
233 |
+
# "course_id": get_new_course_id(),
|
234 |
+
# "title": course_name,
|
235 |
+
# "description": course_description,
|
236 |
+
# "faculty": faculty_name,
|
237 |
+
# "faculty_id": faculty_id,
|
238 |
+
# "duration": f"{duration_weeks} weeks",
|
239 |
+
# "created_at": datetime.utcnow(),
|
240 |
+
# "sessions": all_sessions
|
241 |
+
# }
|
242 |
+
# try:
|
243 |
+
# courses_collection.insert_one(course_doc)
|
244 |
+
# except Exception as e:
|
245 |
+
# st.error(f"Failed to insert course data into the database: {e}")
|
246 |
+
|
247 |
+
# print(course_plan)
|
248 |
+
|
249 |
+
def create_session(title: str, date: datetime, module_name: str):
|
250 |
+
"""Create a session document with pre-class, in-class, and post-class components."""
|
251 |
+
return {
|
252 |
+
"session_id": ObjectId(),
|
253 |
+
"title": title,
|
254 |
+
"date": date,
|
255 |
+
"status": "upcoming",
|
256 |
+
"created_at": datetime.utcnow(),
|
257 |
+
"pre_class": {
|
258 |
+
"resources": [],
|
259 |
+
"completion_required": True
|
260 |
+
},
|
261 |
+
"in_class": {
|
262 |
+
"quiz": [],
|
263 |
+
"polls": []
|
264 |
+
},
|
265 |
+
"post_class": {
|
266 |
+
"assignments": []
|
267 |
+
}
|
268 |
+
}
|
269 |
+
|
270 |
+
# Usage example:
|
271 |
+
if __name__ == "__main__":
|
272 |
+
create_course("Introduction to Data Analytics", datetime.now(), 2)
|
create_course2.py
ADDED
@@ -0,0 +1,331 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
import os
|
3 |
+
from typing import Dict, List, Any
|
4 |
+
from pymongo import MongoClient
|
5 |
+
import requests
|
6 |
+
import uuid
|
7 |
+
import openai
|
8 |
+
from openai import OpenAI
|
9 |
+
import streamlit as st
|
10 |
+
from bson import ObjectId
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
import json
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
MONGODB_URI = os.getenv("MONGO_URI")
|
16 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
|
17 |
+
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
|
18 |
+
|
19 |
+
client = MongoClient(MONGODB_URI)
|
20 |
+
db = client['novascholar_db']
|
21 |
+
courses_collection = db['courses']
|
22 |
+
|
23 |
+
def generate_perplexity_response(api_key, course_name, duration_weeks, sessions_per_week):
|
24 |
+
headers = {
|
25 |
+
"accept": "application/json",
|
26 |
+
"content-type": "application/json",
|
27 |
+
"authorization": f"Bearer {api_key}"
|
28 |
+
}
|
29 |
+
|
30 |
+
# Calculate sessions based on duration
|
31 |
+
total_sessions = duration_weeks * sessions_per_week # Assuming 2 sessions per week
|
32 |
+
|
33 |
+
prompt = f"""
|
34 |
+
You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate a comprehensive, academically rigorous course structure for the course {course_name} that fits exactly within {duration_weeks} weeks with {total_sessions} total sessions ({sessions_per_week} sessions per week).
|
35 |
+
|
36 |
+
Please generate a detailed course structure in JSON format following these specifications:
|
37 |
+
|
38 |
+
1. The course structure must be designed for exactly {duration_weeks} weeks with {total_sessions} total sessions
|
39 |
+
2. Each module should contain an appropriate number of sessions that sum up to exactly {total_sessions}
|
40 |
+
3. Each session should be designed for a 1-1.5-hour class duration
|
41 |
+
4. Follow standard academic practices and nomenclature
|
42 |
+
5. Ensure progressive complexity from foundational to advanced concepts
|
43 |
+
6. The course_title should exactly match the course name provided
|
44 |
+
7. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
|
45 |
+
8. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
46 |
+
|
47 |
+
The JSON response should follow this structure:
|
48 |
+
{{
|
49 |
+
"course_title": "string",
|
50 |
+
"course_description": "string",
|
51 |
+
"total_duration_weeks": {duration_weeks},
|
52 |
+
"sessions_per_week": {sessions_per_week},
|
53 |
+
"total_sessions": {total_sessions},
|
54 |
+
"modules": [
|
55 |
+
{{
|
56 |
+
"module_title": "string",
|
57 |
+
"module_duration_sessions": number,
|
58 |
+
"sub_modules": [
|
59 |
+
{{
|
60 |
+
"title": "string",
|
61 |
+
"topics": [
|
62 |
+
{{
|
63 |
+
"title": "string",
|
64 |
+
"short_description": "string",
|
65 |
+
"concise_learning_objectives": ["string"]
|
66 |
+
}}
|
67 |
+
]
|
68 |
+
}}
|
69 |
+
]
|
70 |
+
}}
|
71 |
+
]
|
72 |
+
}}
|
73 |
+
|
74 |
+
Ensure that:
|
75 |
+
1. The sum of all module_duration_sessions equals exactly {total_sessions}
|
76 |
+
2. Each topic has clear learning objectives
|
77 |
+
3. Topics build upon each other logically
|
78 |
+
4. Content is distributed evenly across the available sessions
|
79 |
+
5. **This Instruction is Strictly followed: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.****
|
80 |
+
|
81 |
+
"""
|
82 |
+
|
83 |
+
messages = [
|
84 |
+
{
|
85 |
+
"role": "system",
|
86 |
+
"content": (
|
87 |
+
"You are an expert educational AI assistant specializing in course design and curriculum planning. "
|
88 |
+
"Your task is to generate accurate, detailed, and structured educational content that precisely fits "
|
89 |
+
"the specified duration."
|
90 |
+
),
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"role": "user",
|
94 |
+
"content": prompt
|
95 |
+
},
|
96 |
+
]
|
97 |
+
|
98 |
+
try:
|
99 |
+
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
100 |
+
response = client.chat.completions.create(
|
101 |
+
model="llama-3.1-sonar-small-128k-online",
|
102 |
+
messages=messages
|
103 |
+
)
|
104 |
+
content = response.choices[0].message.content
|
105 |
+
|
106 |
+
# Validate session count
|
107 |
+
course_plan = json.loads(content)
|
108 |
+
total_planned_sessions = sum(
|
109 |
+
module.get('module_duration_sessions', 0)
|
110 |
+
for module in course_plan.get('modules', [])
|
111 |
+
)
|
112 |
+
|
113 |
+
if abs(total_planned_sessions - total_sessions) > 5:
|
114 |
+
raise ValueError(f"Generated plan has {total_planned_sessions} sessions, but {total_sessions} were requested")
|
115 |
+
|
116 |
+
return content
|
117 |
+
except Exception as e:
|
118 |
+
st.error(f"Failed to fetch data from Perplexity API: {e}")
|
119 |
+
return ""
|
120 |
+
|
121 |
+
def generate_session_resources(api_key, session_titles: List[str]):
|
122 |
+
"""
|
123 |
+
Generate relevant resources for each session title separately
|
124 |
+
"""
|
125 |
+
resources_prompt = f"""
|
126 |
+
You are an expert educational content curator. For each session title provided, suggest highly relevant and accurate learning resources.
|
127 |
+
Please provide resources for these sessions: {session_titles}
|
128 |
+
|
129 |
+
For each session, provide resources in this JSON format:
|
130 |
+
{{
|
131 |
+
"session_resources": [
|
132 |
+
{{
|
133 |
+
"session_title": "string",
|
134 |
+
"resources": {{
|
135 |
+
"readings": [
|
136 |
+
{{
|
137 |
+
"title": "string",
|
138 |
+
"url": "string",
|
139 |
+
"type": "string",
|
140 |
+
"estimated_read_time": "string"
|
141 |
+
}}
|
142 |
+
],
|
143 |
+
"books": [
|
144 |
+
{{
|
145 |
+
"title": "string",
|
146 |
+
"author": "string",
|
147 |
+
"isbn": "string",
|
148 |
+
"chapters": "string"
|
149 |
+
}}
|
150 |
+
],
|
151 |
+
"additional_resources": [
|
152 |
+
{{
|
153 |
+
"title": "string",
|
154 |
+
"url": "string",
|
155 |
+
"type": "string",
|
156 |
+
"description": "string"
|
157 |
+
}}
|
158 |
+
]
|
159 |
+
}}
|
160 |
+
}}
|
161 |
+
]
|
162 |
+
}}
|
163 |
+
|
164 |
+
Guidelines:
|
165 |
+
1. Ensure all URLs are real and currently active
|
166 |
+
2. Prioritize high-quality, authoritative sources
|
167 |
+
3. Include 1-2 resources of each type
|
168 |
+
5. For readings, include a mix of academic and practical resources. It can exceed to 3-4 readings
|
169 |
+
6. Book references should be real, recently published works
|
170 |
+
7. Additional resources can include tools, documentation, or practice platforms
|
171 |
+
8. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
|
172 |
+
9. ***NOTE: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
173 |
+
"""
|
174 |
+
|
175 |
+
messages = [
|
176 |
+
{
|
177 |
+
"role": "system",
|
178 |
+
"content": "You are an expert educational content curator, focused on providing accurate and relevant learning resources.",
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"role": "user",
|
182 |
+
"content": resources_prompt
|
183 |
+
},
|
184 |
+
]
|
185 |
+
|
186 |
+
try:
|
187 |
+
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
188 |
+
response = client.chat.completions.create(
|
189 |
+
model="llama-3.1-sonar-small-128k-online",
|
190 |
+
messages=messages
|
191 |
+
)
|
192 |
+
print("Response is: \n", response.choices[0].message.content)
|
193 |
+
# try:
|
194 |
+
# return json.loads(response.choices[0].message.content)
|
195 |
+
# except json.JSONDecodeError as e:
|
196 |
+
# st.error(f"Failed to decode JSON response: {e}")
|
197 |
+
# return None
|
198 |
+
return response.choices[0].message.content
|
199 |
+
except Exception as e:
|
200 |
+
st.error(f"Failed to generate resources: {e}")
|
201 |
+
return None
|
202 |
+
|
203 |
+
def validate_course_plan(course_plan):
|
204 |
+
required_fields = ['course_title', 'course_description', 'modules']
|
205 |
+
if not all(field in course_plan for field in required_fields):
|
206 |
+
raise ValueError("Invalid course plan structure")
|
207 |
+
|
208 |
+
for module in course_plan['modules']:
|
209 |
+
if 'module_title' not in module or 'sub_modules' not in module:
|
210 |
+
raise ValueError("Invalid module structure")
|
211 |
+
|
212 |
+
def create_session(title: str, date: datetime, module_name: str, resources: dict):
|
213 |
+
"""Create a session document with pre-class, in-class, and post-class components."""
|
214 |
+
return {
|
215 |
+
"session_id": ObjectId(),
|
216 |
+
"title": title,
|
217 |
+
"date": date,
|
218 |
+
"status": "upcoming",
|
219 |
+
"created_at": datetime.utcnow(),
|
220 |
+
"module_name": module_name,
|
221 |
+
"pre_class": {
|
222 |
+
"resources": [],
|
223 |
+
"completion_required": True
|
224 |
+
},
|
225 |
+
"in_class": {
|
226 |
+
"quiz": [],
|
227 |
+
"polls": []
|
228 |
+
},
|
229 |
+
"post_class": {
|
230 |
+
"assignments": []
|
231 |
+
},
|
232 |
+
"external_resources": {
|
233 |
+
"readings": resources.get("readings", []),
|
234 |
+
"books": resources.get("books", []),
|
235 |
+
"additional_resources": resources.get("additional_resources", [])
|
236 |
+
}
|
237 |
+
}
|
238 |
+
|
239 |
+
def create_course(course_name: str, start_date: datetime, duration_weeks: int, sessions_per_week: int):
|
240 |
+
# First generate a course plan using Perplexity API
|
241 |
+
# course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name, duration_weeks, sessions_per_week)
|
242 |
+
# course_plan_json = json.loads(course_plan)
|
243 |
+
|
244 |
+
# print("Course Structure is: \n", course_plan_json);
|
245 |
+
|
246 |
+
# Earlier Code:
|
247 |
+
# Generate sessions for each module with resources
|
248 |
+
# all_sessions = []
|
249 |
+
# current_date = start_date
|
250 |
+
|
251 |
+
# for module in course_plan_json['modules']:
|
252 |
+
# for sub_module in module['sub_modules']:
|
253 |
+
# for topic in sub_module['topics']:
|
254 |
+
# session = create_session(
|
255 |
+
# title=topic['title'],
|
256 |
+
# date=current_date,
|
257 |
+
# module_name=module['module_title'],
|
258 |
+
# resources=topic['resources']
|
259 |
+
# )
|
260 |
+
# all_sessions.append(session)
|
261 |
+
# current_date += timedelta(days=3.5) # Spacing sessions evenly across the week
|
262 |
+
|
263 |
+
# return course_plan_json, all_sessions
|
264 |
+
|
265 |
+
# New Code:
|
266 |
+
# Extract all session titles
|
267 |
+
session_titles = []
|
268 |
+
# Load the course plan JSON
|
269 |
+
course_plan_json = {}
|
270 |
+
with open('sample_files/sample_course.json', 'r') as file:
|
271 |
+
course_plan_json = json.load(file)
|
272 |
+
|
273 |
+
for module in course_plan_json['modules']:
|
274 |
+
for sub_module in module['sub_modules']:
|
275 |
+
for topic in sub_module['topics']:
|
276 |
+
session_titles.append(topic['title'])
|
277 |
+
|
278 |
+
# Generate resources for all sessions
|
279 |
+
session_resources = generate_session_resources(PERPLEXITY_API_KEY, session_titles)
|
280 |
+
# print("Session Resources are: \n", session_resources)
|
281 |
+
resources = json.loads(session_resources)
|
282 |
+
# print("Resources JSON is: \n", resources_json)
|
283 |
+
|
284 |
+
# print("Session Resources are: \n", session_resources)
|
285 |
+
|
286 |
+
# Create a mapping of session titles to their resources
|
287 |
+
|
288 |
+
# Import Resources JSON
|
289 |
+
# resources = {}
|
290 |
+
# with open('sample_files/sample_course_resources.json', 'r') as file:
|
291 |
+
# resources = json.load(file)
|
292 |
+
|
293 |
+
resources_map = {
|
294 |
+
resource['session_title']: resource['resources']
|
295 |
+
for resource in resources['session_resources']
|
296 |
+
}
|
297 |
+
print("Resources Map is: \n", resources_map)
|
298 |
+
# print("Sample is: ", resources_map.get('Overview of ML Concepts, History, and Applications'));
|
299 |
+
# Generate sessions with their corresponding resources
|
300 |
+
all_sessions = []
|
301 |
+
current_date = start_date
|
302 |
+
|
303 |
+
for module in course_plan_json['modules']:
|
304 |
+
for sub_module in module['sub_modules']:
|
305 |
+
for topic in sub_module['topics']:
|
306 |
+
session = create_session(
|
307 |
+
title=topic['title'],
|
308 |
+
date=current_date,
|
309 |
+
module_name=module['module_title'],
|
310 |
+
resources=resources_map.get(topic['title'], {})
|
311 |
+
)
|
312 |
+
all_sessions.append(session)
|
313 |
+
current_date += timedelta(days=3.5)
|
314 |
+
|
315 |
+
print("All Sessions are: \n", all_sessions)
|
316 |
+
|
317 |
+
def get_new_course_id():
|
318 |
+
"""Generate a new course ID by incrementing the last course ID"""
|
319 |
+
last_course = courses_collection.find_one(sort=[("course_id", -1)])
|
320 |
+
if last_course:
|
321 |
+
last_course_id = int(last_course["course_id"][2:])
|
322 |
+
new_course_id = f"CS{last_course_id + 1}"
|
323 |
+
else:
|
324 |
+
new_course_id = "CS101"
|
325 |
+
return new_course_id
|
326 |
+
|
327 |
+
# if __name__ == "__main__":
|
328 |
+
# course_name = "Introduction to Machine Learning"
|
329 |
+
# start_date = datetime(2022, 9, 1)
|
330 |
+
# duration_weeks = 4
|
331 |
+
# create_course(course_name, start_date, duration_weeks, 3)
|
db.py
ADDED
@@ -0,0 +1,696 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Setup for MongoDB
|
2 |
+
from pymongo import MongoClient
|
3 |
+
from datetime import datetime
|
4 |
+
from werkzeug.security import generate_password_hash
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
load_dotenv()
|
9 |
+
MONGO_URI = os.getenv("MONGO_URI")
|
10 |
+
|
11 |
+
client = MongoClient(MONGO_URI)
|
12 |
+
try:
|
13 |
+
client.admin.command("ping")
|
14 |
+
print("MongoDB connection successful")
|
15 |
+
except Exception as e:
|
16 |
+
print(f"MongoDB connection failed: {e}")
|
17 |
+
|
18 |
+
db = client["novascholar_db"]
|
19 |
+
|
20 |
+
########
|
21 |
+
# Research Assistant Schema
|
22 |
+
research_assistant_schema = {
|
23 |
+
"bsonType": "object",
|
24 |
+
"required": ["full_name", "password", "email", "courses_assisted"],
|
25 |
+
"properties": {
|
26 |
+
"full_name": {
|
27 |
+
"bsonType": "string",
|
28 |
+
"description": "Full name of the research assistant",
|
29 |
+
},
|
30 |
+
"password": {
|
31 |
+
"bsonType": "string",
|
32 |
+
"description": "Hashed password of the research assistant",
|
33 |
+
},
|
34 |
+
"email": {
|
35 |
+
"bsonType": "string",
|
36 |
+
"description": "Email address of the research assistant",
|
37 |
+
},
|
38 |
+
"courses_assisted": {
|
39 |
+
"bsonType": "array",
|
40 |
+
"description": "List of courses the research assistant is assisting",
|
41 |
+
"items": {
|
42 |
+
"bsonType": "object",
|
43 |
+
"required": ["course_id"],
|
44 |
+
"properties": {
|
45 |
+
"course_id": {
|
46 |
+
"bsonType": "string",
|
47 |
+
"description": "ID of the course",
|
48 |
+
}
|
49 |
+
},
|
50 |
+
},
|
51 |
+
},
|
52 |
+
},
|
53 |
+
}
|
54 |
+
|
55 |
+
# Create research assistants collection
|
56 |
+
research_assistants_collection = db["research_assistants"]
|
57 |
+
|
58 |
+
# Create indexes
|
59 |
+
research_assistants_collection.create_index("full_name", unique=True)
|
60 |
+
research_assistants_collection.create_index("email", unique=True)
|
61 |
+
|
62 |
+
|
63 |
+
# Optional: Sample data insertion function
|
64 |
+
def insert_sample_research_assistants():
|
65 |
+
sample_research_assistants = [
|
66 |
+
{
|
67 |
+
"full_name": "John Doe RA",
|
68 |
+
"password": generate_password_hash("password123"),
|
69 |
+
"email": "[email protected]",
|
70 |
+
"courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
|
71 |
+
}
|
72 |
+
]
|
73 |
+
|
74 |
+
try:
|
75 |
+
research_assistants_collection.insert_many(sample_research_assistants)
|
76 |
+
print("Sample research assistants inserted successfully!")
|
77 |
+
except Exception as e:
|
78 |
+
print(f"Error inserting sample research assistants: {e}")
|
79 |
+
|
80 |
+
|
81 |
+
###########
|
82 |
+
|
83 |
+
###############
|
84 |
+
# Add after research assistant schema
|
85 |
+
|
86 |
+
# Analyst Schema
|
87 |
+
analyst_schema = {
|
88 |
+
"bsonType": "object",
|
89 |
+
"required": ["full_name", "password", "email", "courses_analyzed"],
|
90 |
+
"properties": {
|
91 |
+
"full_name": {"bsonType": "string", "description": "Full name of the analyst"},
|
92 |
+
"password": {
|
93 |
+
"bsonType": "string",
|
94 |
+
"description": "Hashed password of the analyst",
|
95 |
+
},
|
96 |
+
"email": {"bsonType": "string", "description": "Email address of the analyst"},
|
97 |
+
"courses_analyzed": {
|
98 |
+
"bsonType": "array",
|
99 |
+
"description": "List of courses the analyst is analyzing",
|
100 |
+
"items": {
|
101 |
+
"bsonType": "object",
|
102 |
+
"required": ["course_id"],
|
103 |
+
"properties": {
|
104 |
+
"course_id": {
|
105 |
+
"bsonType": "string",
|
106 |
+
"description": "ID of the course",
|
107 |
+
}
|
108 |
+
},
|
109 |
+
},
|
110 |
+
},
|
111 |
+
},
|
112 |
+
}
|
113 |
+
|
114 |
+
# Create analysts collection
|
115 |
+
analysts_collection = db["analysts"]
|
116 |
+
|
117 |
+
# Create indexes for analysts
|
118 |
+
analysts_collection.create_index("full_name", unique=True)
|
119 |
+
analysts_collection.create_index("email", unique=True)
|
120 |
+
|
121 |
+
|
122 |
+
def insert_sample_analysts():
|
123 |
+
sample_analysts = [
|
124 |
+
{
|
125 |
+
"full_name": "jane",
|
126 |
+
"password": generate_password_hash("jane"),
|
127 |
+
"email": "[email protected]",
|
128 |
+
"courses_analyzed": [{"course_id": "CS101"}, {"course_id": "CS102"}],
|
129 |
+
}
|
130 |
+
]
|
131 |
+
|
132 |
+
try:
|
133 |
+
analysts_collection.insert_many(sample_analysts)
|
134 |
+
print("Sample analysts inserted successfully!")
|
135 |
+
except Exception as e:
|
136 |
+
print(f"Error inserting sample analysts: {e}")
|
137 |
+
|
138 |
+
|
139 |
+
##############@
|
140 |
+
|
141 |
+
|
142 |
+
# Define the course schema
|
143 |
+
course_schema = {
|
144 |
+
"bsonType": "object",
|
145 |
+
"required": [
|
146 |
+
"course_id",
|
147 |
+
"title",
|
148 |
+
"description",
|
149 |
+
"faculty",
|
150 |
+
"faculty_id",
|
151 |
+
"duration",
|
152 |
+
"created_at",
|
153 |
+
],
|
154 |
+
"properties": {
|
155 |
+
"course_id": {
|
156 |
+
"bsonType": "string",
|
157 |
+
"description": "Unique identifier for the course",
|
158 |
+
},
|
159 |
+
"title": {"bsonType": "string", "description": "Title of the course"},
|
160 |
+
"description": {
|
161 |
+
"bsonType": "string",
|
162 |
+
"description": "Description of the course",
|
163 |
+
},
|
164 |
+
"faculty": {"bsonType": "string", "description": "Name of the faculty"},
|
165 |
+
"duration": {"bsonType": "string", "description": "Duration of the course"},
|
166 |
+
"created_at": {
|
167 |
+
"bsonType": "date",
|
168 |
+
"description": "Date when the course was created",
|
169 |
+
},
|
170 |
+
"sessions": {
|
171 |
+
"bsonType": "array",
|
172 |
+
"description": "List of sessions associated with the course",
|
173 |
+
"items": {
|
174 |
+
"bsonType": "object",
|
175 |
+
"required": ["session_id", "title", "date", "status", "created_at"],
|
176 |
+
"properties": {
|
177 |
+
"session_id": {
|
178 |
+
"bsonType": "string",
|
179 |
+
"description": "Unique identifier for the session",
|
180 |
+
},
|
181 |
+
"title": {
|
182 |
+
"bsonType": "string",
|
183 |
+
"description": "Title of the session",
|
184 |
+
},
|
185 |
+
"date": {"bsonType": "date", "description": "Date of the session"},
|
186 |
+
"status": {
|
187 |
+
"bsonType": "string",
|
188 |
+
"description": "Status of the session (e.g., completed, upcoming)",
|
189 |
+
},
|
190 |
+
"created_at": {
|
191 |
+
"bsonType": "date",
|
192 |
+
"description": "Date when the session was created",
|
193 |
+
},
|
194 |
+
"pre_class": {
|
195 |
+
"bsonType": "object",
|
196 |
+
"description": "Pre-class segment data",
|
197 |
+
"properties": {
|
198 |
+
"resources": {
|
199 |
+
"bsonType": "array",
|
200 |
+
"description": "List of pre-class resources",
|
201 |
+
"items": {
|
202 |
+
"bsonType": "object",
|
203 |
+
"required": ["type", "title", "url"],
|
204 |
+
"properties": {
|
205 |
+
"type": {
|
206 |
+
"bsonType": "string",
|
207 |
+
"description": "Type of resource (e.g., pdf, video)",
|
208 |
+
},
|
209 |
+
"title": {
|
210 |
+
"bsonType": "string",
|
211 |
+
"description": "Title of the resource",
|
212 |
+
},
|
213 |
+
"url": {
|
214 |
+
"bsonType": "string",
|
215 |
+
"description": "URL of the resource",
|
216 |
+
},
|
217 |
+
"vector": {
|
218 |
+
"bsonType": "array",
|
219 |
+
"description": "Vector representation of the resource",
|
220 |
+
"items": {"bsonType": "double"},
|
221 |
+
},
|
222 |
+
},
|
223 |
+
},
|
224 |
+
},
|
225 |
+
"completion_required": {
|
226 |
+
"bsonType": "bool",
|
227 |
+
"description": "Indicates if completion of pre-class resources is required",
|
228 |
+
},
|
229 |
+
},
|
230 |
+
},
|
231 |
+
"in_class": {
|
232 |
+
"bsonType": "object",
|
233 |
+
"description": "In-class segment data",
|
234 |
+
"properties": {
|
235 |
+
"topics": {
|
236 |
+
"bsonType": "array",
|
237 |
+
"description": "List of topics covered in the session",
|
238 |
+
"items": {"bsonType": "string"},
|
239 |
+
},
|
240 |
+
"quiz": {
|
241 |
+
"bsonType": "object",
|
242 |
+
"description": "Quiz data",
|
243 |
+
"properties": {
|
244 |
+
"title": {
|
245 |
+
"bsonType": "string",
|
246 |
+
"description": "Title of the quiz",
|
247 |
+
},
|
248 |
+
"questions": {
|
249 |
+
"bsonType": "int",
|
250 |
+
"description": "Number of questions in the quiz",
|
251 |
+
},
|
252 |
+
"duration": {
|
253 |
+
"bsonType": "int",
|
254 |
+
"description": "Duration of the quiz in minutes",
|
255 |
+
},
|
256 |
+
},
|
257 |
+
},
|
258 |
+
"polls": {
|
259 |
+
"bsonType": "array",
|
260 |
+
"description": "List of polls conducted during the session",
|
261 |
+
"items": {
|
262 |
+
"bsonType": "object",
|
263 |
+
"required": ["question", "options"],
|
264 |
+
"properties": {
|
265 |
+
"question": {
|
266 |
+
"bsonType": "string",
|
267 |
+
"description": "Poll question",
|
268 |
+
},
|
269 |
+
"options": {
|
270 |
+
"bsonType": "array",
|
271 |
+
"description": "List of poll options",
|
272 |
+
"items": {"bsonType": "string"},
|
273 |
+
},
|
274 |
+
"responses": {
|
275 |
+
"bsonType": "object",
|
276 |
+
"description": "Responses to the poll",
|
277 |
+
"additionalProperties": {"bsonType": "int"},
|
278 |
+
},
|
279 |
+
},
|
280 |
+
},
|
281 |
+
},
|
282 |
+
},
|
283 |
+
},
|
284 |
+
"post_class": {
|
285 |
+
"bsonType": "object",
|
286 |
+
"description": "Post-class segment data",
|
287 |
+
"properties": {
|
288 |
+
"assignments": {
|
289 |
+
"bsonType": "array",
|
290 |
+
"description": "List of assignments",
|
291 |
+
"items": {
|
292 |
+
"bsonType": "object",
|
293 |
+
"required": ["id", "title", "due_date", "status"],
|
294 |
+
"properties": {
|
295 |
+
"id": {
|
296 |
+
"bsonType": "int",
|
297 |
+
"description": "Assignment ID",
|
298 |
+
},
|
299 |
+
"title": {
|
300 |
+
"bsonType": "string",
|
301 |
+
"description": "Title of the assignment",
|
302 |
+
},
|
303 |
+
"due_date": {
|
304 |
+
"bsonType": "date",
|
305 |
+
"description": "Due date of the assignment",
|
306 |
+
},
|
307 |
+
"status": {
|
308 |
+
"bsonType": "string",
|
309 |
+
"description": "Status of the assignment (e.g., pending, completed)",
|
310 |
+
},
|
311 |
+
"submissions": {
|
312 |
+
"bsonType": "array",
|
313 |
+
"description": "List of submissions",
|
314 |
+
"items": {
|
315 |
+
"bsonType": "object",
|
316 |
+
"required": [
|
317 |
+
"student_id",
|
318 |
+
"file_url",
|
319 |
+
"submitted_at",
|
320 |
+
],
|
321 |
+
"properties": {
|
322 |
+
"student_id": {
|
323 |
+
"bsonType": "string",
|
324 |
+
"description": "ID of the student who submitted the assignment",
|
325 |
+
},
|
326 |
+
"file_url": {
|
327 |
+
"bsonType": "string",
|
328 |
+
"description": "URL of the submitted file",
|
329 |
+
},
|
330 |
+
"submitted_at": {
|
331 |
+
"bsonType": "date",
|
332 |
+
"description": "Date when the assignment was submitted",
|
333 |
+
},
|
334 |
+
},
|
335 |
+
},
|
336 |
+
},
|
337 |
+
},
|
338 |
+
},
|
339 |
+
}
|
340 |
+
},
|
341 |
+
},
|
342 |
+
},
|
343 |
+
},
|
344 |
+
},
|
345 |
+
},
|
346 |
+
}
|
347 |
+
|
348 |
+
# Create the collection with the schema
|
349 |
+
# db.create_collection("courses_collection2", validator={"$jsonSchema": course_schema})
|
350 |
+
|
351 |
+
# sample_course = {
|
352 |
+
# "course_id": "CS101",
|
353 |
+
# "title": "Introduction to Computer Science",
|
354 |
+
# "description": "This course covers the basics of computer science and programming.",
|
355 |
+
# "faculty": "Dr. John Doe",
|
356 |
+
# "faculty_id": "F101",
|
357 |
+
# "duration": "10 weeks",
|
358 |
+
# "created_at": datetime.utcnow(),
|
359 |
+
# "sessions": [
|
360 |
+
# {
|
361 |
+
# "session_id": "S101",
|
362 |
+
# "title": "Introduction to Programming Fundamentals",
|
363 |
+
# "date": datetime.utcnow() - timedelta(days=7),
|
364 |
+
# "status": "completed",
|
365 |
+
# "created_at": datetime.utcnow() - timedelta(days=7),
|
366 |
+
# "pre_class": {
|
367 |
+
# "resources": [
|
368 |
+
# {
|
369 |
+
# "type": "pdf",
|
370 |
+
# "title": "Introduction to Python Basics",
|
371 |
+
# "url": "/assets/python_basics.pdf",
|
372 |
+
# "vector": [0.1, 0.2, 0.3] # Example vector
|
373 |
+
# }
|
374 |
+
# ],
|
375 |
+
# "completion_required": True
|
376 |
+
# },
|
377 |
+
# "in_class": {
|
378 |
+
# "topics": ["Variables", "Data Types", "Basic Operations"],
|
379 |
+
# "quiz": {
|
380 |
+
# "title": "Python Basics Quiz",
|
381 |
+
# "questions": 5,
|
382 |
+
# "duration": 15
|
383 |
+
# },
|
384 |
+
# "polls": [
|
385 |
+
# {
|
386 |
+
# "question": "How comfortable are you with Python syntax?",
|
387 |
+
# "options": ["Very", "Somewhat", "Not at all"],
|
388 |
+
# "responses": {"Very": 10, "Somewhat": 5, "Not at all": 2}
|
389 |
+
# }
|
390 |
+
# ]
|
391 |
+
# },
|
392 |
+
# "post_class": {
|
393 |
+
# "assignments": [
|
394 |
+
# {
|
395 |
+
# "id": 1,
|
396 |
+
# "title": "Basic Python Programs",
|
397 |
+
# "due_date": datetime.utcnow() + timedelta(days=2),
|
398 |
+
# "status": "pending",
|
399 |
+
# "submissions": []
|
400 |
+
# }
|
401 |
+
# ]
|
402 |
+
# }
|
403 |
+
# },
|
404 |
+
# {
|
405 |
+
# "session_id": "S102",
|
406 |
+
# "title": "Control Flow and Functions",
|
407 |
+
# "date": datetime.utcnow() - timedelta(days=3),
|
408 |
+
# "status": "completed",
|
409 |
+
# "created_at": datetime.utcnow() - timedelta(days=3),
|
410 |
+
# "pre_class": {
|
411 |
+
# "resources": [
|
412 |
+
# {
|
413 |
+
# "type": "pdf",
|
414 |
+
# "title": "Control Flow in Python",
|
415 |
+
# "url": "/assets/control_flow.pdf",
|
416 |
+
# "vector": [0.4, 0.5, 0.6] # Example vector
|
417 |
+
# }
|
418 |
+
# ],
|
419 |
+
# "completion_required": True
|
420 |
+
# },
|
421 |
+
# "in_class": {
|
422 |
+
# "topics": ["If-else statements", "Loops", "Function definitions"],
|
423 |
+
# "quiz": {
|
424 |
+
# "title": "Control Flow Quiz",
|
425 |
+
# "questions": 8,
|
426 |
+
# "duration": 20
|
427 |
+
# },
|
428 |
+
# "polls": [
|
429 |
+
# {
|
430 |
+
# "question": "Which loop type do you find more intuitive?",
|
431 |
+
# "options": ["For loops", "While loops", "Both"],
|
432 |
+
# "responses": {"For loops": 12, "While loops": 8, "Both": 10}
|
433 |
+
# }
|
434 |
+
# ]
|
435 |
+
# },
|
436 |
+
# "post_class": {
|
437 |
+
# "assignments": [
|
438 |
+
# {
|
439 |
+
# "id": 2,
|
440 |
+
# "title": "Function Implementation Exercise",
|
441 |
+
# "due_date": datetime.utcnow() + timedelta(days=4),
|
442 |
+
# "status": "pending",
|
443 |
+
# "submissions": []
|
444 |
+
# }
|
445 |
+
# ]
|
446 |
+
# }
|
447 |
+
# }
|
448 |
+
# ]
|
449 |
+
# }
|
450 |
+
courses_collection2 = db["courses_collection2"]
|
451 |
+
|
452 |
+
|
453 |
+
# Define the users schema
|
454 |
+
users_schema = {
|
455 |
+
"bsonType": "object",
|
456 |
+
"required": ["user_id", "username", "password", "role", "created_at"],
|
457 |
+
"properties": {
|
458 |
+
"user_id": {
|
459 |
+
"bsonType": "string",
|
460 |
+
"description": "Unique identifier for the user",
|
461 |
+
},
|
462 |
+
"username": {"bsonType": "string", "description": "Name of the User"},
|
463 |
+
"password": {"bsonType": "string", "description": "Password of the user"},
|
464 |
+
"role": {
|
465 |
+
"bsonType": "string",
|
466 |
+
"description": "Type of user (e.g., student, faculty)",
|
467 |
+
},
|
468 |
+
"created_at": {
|
469 |
+
"bsonType": "date",
|
470 |
+
"description": "Date when the user was created",
|
471 |
+
},
|
472 |
+
},
|
473 |
+
}
|
474 |
+
# Create the collection with the schema
|
475 |
+
# db.create_collection("users", validator={"$jsonSchema": users_schema})
|
476 |
+
users_collection = db["users"]
|
477 |
+
|
478 |
+
|
479 |
+
# Defining the Student Collection
|
480 |
+
student_schema = {
|
481 |
+
"bsonType": "object",
|
482 |
+
"required": ["SID", "full_name", "password", "enrolled_courses", "created_at"],
|
483 |
+
"properties": {
|
484 |
+
"SID": {
|
485 |
+
"bsonType": "string",
|
486 |
+
"description": "Unique identifier for the student",
|
487 |
+
},
|
488 |
+
"full_name": {"bsonType": "string", "description": "Full name of the student"},
|
489 |
+
"password": {
|
490 |
+
"bsonType": "string",
|
491 |
+
"description": "Hashed password of the student",
|
492 |
+
},
|
493 |
+
"enrolled_courses": {
|
494 |
+
"bsonType": "array",
|
495 |
+
"description": "List of courses the student is enrolled in",
|
496 |
+
"items": {
|
497 |
+
"bsonType": "object",
|
498 |
+
"required": ["course_id", "title"],
|
499 |
+
"properties": {
|
500 |
+
"course_id": {
|
501 |
+
"bsonType": "string",
|
502 |
+
"description": "Unique identifier for the course",
|
503 |
+
},
|
504 |
+
"title": {
|
505 |
+
"bsonType": "string",
|
506 |
+
"description": "Title of the course",
|
507 |
+
},
|
508 |
+
},
|
509 |
+
},
|
510 |
+
},
|
511 |
+
"created_at": {
|
512 |
+
"bsonType": "date",
|
513 |
+
"description": "Date when the student was created",
|
514 |
+
},
|
515 |
+
},
|
516 |
+
}
|
517 |
+
# Defining the Faculty Collection
|
518 |
+
faculty_schema = {
|
519 |
+
"bsonType": "object",
|
520 |
+
"required": ["TID", "full_name", "password", "courses_taught", "created_at"],
|
521 |
+
"properties": {
|
522 |
+
"TID": {
|
523 |
+
"bsonType": "string",
|
524 |
+
"description": "Unique identifier for the faculty",
|
525 |
+
},
|
526 |
+
"full_name": {"bsonType": "string", "description": "Full name of the faculty"},
|
527 |
+
"password": {
|
528 |
+
"bsonType": "string",
|
529 |
+
"description": "Hashed password of the faculty",
|
530 |
+
},
|
531 |
+
"courses_taught": {
|
532 |
+
"bsonType": "array",
|
533 |
+
"description": "List of courses the faculty is teaching",
|
534 |
+
"items": {
|
535 |
+
"bsonType": "object",
|
536 |
+
"required": ["course_id", "title"],
|
537 |
+
"properties": {
|
538 |
+
"course_id": {
|
539 |
+
"bsonType": "string",
|
540 |
+
"description": "Unique identifier for the course",
|
541 |
+
},
|
542 |
+
"title": {
|
543 |
+
"bsonType": "string",
|
544 |
+
"description": "Title of the course",
|
545 |
+
},
|
546 |
+
},
|
547 |
+
},
|
548 |
+
},
|
549 |
+
"created_at": {
|
550 |
+
"bsonType": "date",
|
551 |
+
"description": "Date when the faculty was created",
|
552 |
+
},
|
553 |
+
},
|
554 |
+
}
|
555 |
+
# Creating the Collections
|
556 |
+
# db.create_collection("students", validator={"$jsonSchema": student_schema})
|
557 |
+
# db.create_collection("faculty", validator={"$jsonSchema": faculty_schema})
|
558 |
+
|
559 |
+
students_collection = db["students"]
|
560 |
+
faculty_collection = db["faculty"]
|
561 |
+
|
562 |
+
# Defining the Vector Collection Schema
|
563 |
+
vector_schema = {
|
564 |
+
"bsonType": "object",
|
565 |
+
"required": ["resource_id", "vector"],
|
566 |
+
"properties": {
|
567 |
+
"resource_id": {
|
568 |
+
"bsonType": "objectId",
|
569 |
+
"description": "Unique identifier for the resource",
|
570 |
+
},
|
571 |
+
"vector": {
|
572 |
+
"bsonType": "array",
|
573 |
+
"description": "Vector representation of the resource",
|
574 |
+
"items": {"bsonType": "double"},
|
575 |
+
},
|
576 |
+
"text": {"bsonType": "string", "description": "Text content of the resource"},
|
577 |
+
"created_at": {
|
578 |
+
"bsonType": "date",
|
579 |
+
"description": "Date when the vector was created",
|
580 |
+
},
|
581 |
+
},
|
582 |
+
}
|
583 |
+
# Creating the Vector Collection
|
584 |
+
# db.create_collection("vectors", validator={"$jsonSchema": vector_schema})
|
585 |
+
vectors_collection = db["vectors"]
|
586 |
+
|
587 |
+
|
588 |
+
# Creating a Chat-History Collection
|
589 |
+
# Creating a Chat-History Collection
|
590 |
+
chat_history_schema = {
|
591 |
+
"bsonType": "object",
|
592 |
+
"required": ["user_id", "session_id", "messages", "timestamp"],
|
593 |
+
"properties": {
|
594 |
+
"user_id": {
|
595 |
+
"bsonType": "objectId",
|
596 |
+
"description": "Unique identifier for the user",
|
597 |
+
},
|
598 |
+
"session_id": {
|
599 |
+
"bsonType": "string",
|
600 |
+
"description": "Identifier for the session",
|
601 |
+
},
|
602 |
+
"timestamp": {
|
603 |
+
"bsonType": "date",
|
604 |
+
"description": "Timestamp when the chat session started",
|
605 |
+
},
|
606 |
+
"messages": {
|
607 |
+
"bsonType": "array",
|
608 |
+
"description": "List of chat messages",
|
609 |
+
"items": {
|
610 |
+
"bsonType": "object",
|
611 |
+
"properties": {
|
612 |
+
"prompt": {
|
613 |
+
"bsonType": "string",
|
614 |
+
"description": "User's question or prompt",
|
615 |
+
},
|
616 |
+
"response": {
|
617 |
+
"bsonType": "string",
|
618 |
+
"description": "Assistant's response",
|
619 |
+
},
|
620 |
+
"timestamp": {
|
621 |
+
"bsonType": "date",
|
622 |
+
"description": "Timestamp of the message",
|
623 |
+
},
|
624 |
+
},
|
625 |
+
},
|
626 |
+
},
|
627 |
+
},
|
628 |
+
}
|
629 |
+
|
630 |
+
# Create the collection with the schema
|
631 |
+
# db.create_collection("chat_history", validator={"$jsonSchema": chat_history_schema})
|
632 |
+
chat_history_collection = db["chat_history"]
|
633 |
+
|
634 |
+
|
635 |
+
# Database setup for Research Assistant
|
636 |
+
# Research Assistant Schema
|
637 |
+
research_assistant_schema = {
|
638 |
+
"bsonType": "object",
|
639 |
+
"required": ["full_name", "password", "email", "courses_assisted"],
|
640 |
+
"properties": {
|
641 |
+
"full_name": {
|
642 |
+
"bsonType": "string",
|
643 |
+
"description": "Full name of the research assistant",
|
644 |
+
},
|
645 |
+
"password": {
|
646 |
+
"bsonType": "string",
|
647 |
+
"description": "Hashed password of the research assistant",
|
648 |
+
},
|
649 |
+
"email": {
|
650 |
+
"bsonType": "string",
|
651 |
+
"description": "Email address of the research assistant",
|
652 |
+
},
|
653 |
+
"courses_assisted": {
|
654 |
+
"bsonType": "array",
|
655 |
+
"description": "List of courses the research assistant is assisting",
|
656 |
+
"items": {
|
657 |
+
"bsonType": "object",
|
658 |
+
"required": ["course_id"],
|
659 |
+
"properties": {
|
660 |
+
"course_id": {
|
661 |
+
"bsonType": "string",
|
662 |
+
"description": "ID of the course",
|
663 |
+
}
|
664 |
+
},
|
665 |
+
},
|
666 |
+
},
|
667 |
+
},
|
668 |
+
}
|
669 |
+
|
670 |
+
# Create research assistants collection
|
671 |
+
research_assistants_collection = db["research_assistants"]
|
672 |
+
|
673 |
+
# Create indexes
|
674 |
+
research_assistants_collection.create_index("full_name", unique=True)
|
675 |
+
research_assistants_collection.create_index("email", unique=True)
|
676 |
+
|
677 |
+
|
678 |
+
# Optional: Sample data insertion function
|
679 |
+
# def insert_sample_research_assistants():
|
680 |
+
# sample_research_assistants = [
|
681 |
+
# {
|
682 |
+
# "full_name": "John Doe RA",
|
683 |
+
# "password": generate_password_hash("password123"),
|
684 |
+
# "email": "[email protected]",
|
685 |
+
# "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
|
686 |
+
# }
|
687 |
+
# ]
|
688 |
+
|
689 |
+
# try:
|
690 |
+
# research_assistants_collection.insert_many(sample_research_assistants)
|
691 |
+
# print("Sample research assistants inserted successfully!")
|
692 |
+
# except Exception as e:
|
693 |
+
# print(f"Error inserting sample research assistants: {e}")
|
694 |
+
|
695 |
+
# if __name__ == "__main__":
|
696 |
+
# insert_sample_analysts()
|
entire_download.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from pymongo import MongoClient
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import os
|
6 |
+
|
7 |
+
# 1. Load environment variables
|
8 |
+
load_dotenv()
|
9 |
+
MONGODB_URI = os.getenv(
|
10 |
+
"MONGODB_UR",
|
11 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
12 |
+
)
|
13 |
+
|
14 |
+
# 2. Create MongoDB connection
|
15 |
+
client = MongoClient(MONGODB_URI)
|
16 |
+
db = client["novascholar_db"]
|
17 |
+
collection = db["research_papers"]
|
18 |
+
|
19 |
+
|
20 |
+
def get_collection_data(paper_type: str):
|
21 |
+
"""
|
22 |
+
Fetch all documents from the specified collection based on paper type.
|
23 |
+
"""
|
24 |
+
try:
|
25 |
+
# Determine collection name based on paper type
|
26 |
+
collection_name = paper_type.replace(" ", "_").lower()
|
27 |
+
doc_collection = db[collection_name]
|
28 |
+
|
29 |
+
# Get all documents
|
30 |
+
docs = list(doc_collection.find())
|
31 |
+
|
32 |
+
# Convert ObjectId to string
|
33 |
+
for doc in docs:
|
34 |
+
doc["_id"] = str(doc["_id"])
|
35 |
+
|
36 |
+
return docs
|
37 |
+
except Exception as e:
|
38 |
+
st.error(f"Database Error: {str(e)}")
|
39 |
+
return None
|
40 |
+
|
41 |
+
|
42 |
+
def main():
|
43 |
+
st.title("MongoDB Collection Download")
|
44 |
+
st.write("Download all documents from the selected research paper collection")
|
45 |
+
|
46 |
+
# Dropdown to select the type of research paper
|
47 |
+
paper_type = st.selectbox(
|
48 |
+
"Select type of research paper:",
|
49 |
+
[
|
50 |
+
"Review Based Paper",
|
51 |
+
"Opinion/Perspective Based Paper",
|
52 |
+
"Empirical Research Paper",
|
53 |
+
"Research Paper (Other)",
|
54 |
+
],
|
55 |
+
)
|
56 |
+
|
57 |
+
if st.button("Fetch Data"):
|
58 |
+
with st.spinner("Retrieving documents from MongoDB..."):
|
59 |
+
docs = get_collection_data(paper_type)
|
60 |
+
|
61 |
+
if docs:
|
62 |
+
# Convert to DataFrame
|
63 |
+
df = pd.DataFrame(docs)
|
64 |
+
# Convert lists to comma-separated strings for consistency
|
65 |
+
for col in df.columns:
|
66 |
+
if df[col].apply(lambda x: isinstance(x, list)).any():
|
67 |
+
df[col] = df[col].apply(
|
68 |
+
lambda x: (
|
69 |
+
", ".join(map(str, x)) if isinstance(x, list) else x
|
70 |
+
)
|
71 |
+
)
|
72 |
+
st.success(
|
73 |
+
f"Successfully retrieved {len(df)} documents from '{paper_type}' collection."
|
74 |
+
)
|
75 |
+
st.dataframe(df)
|
76 |
+
|
77 |
+
# Provide option to download the data as CSV
|
78 |
+
csv = df.to_csv(index=False).encode("utf-8")
|
79 |
+
st.download_button(
|
80 |
+
label="Download CSV",
|
81 |
+
data=csv,
|
82 |
+
file_name=f"{paper_type.replace(' ', '_').lower()}_papers.csv",
|
83 |
+
mime="text/csv",
|
84 |
+
)
|
85 |
+
else:
|
86 |
+
st.warning(f"No documents found in the '{paper_type}' collection.")
|
87 |
+
|
88 |
+
|
89 |
+
if __name__ == "__main__":
|
90 |
+
main()
|
extract.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import PyPDF2
|
4 |
+
import io
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import requests
|
8 |
+
import time
|
9 |
+
|
10 |
+
# Load environment variables
|
11 |
+
load_dotenv()
|
12 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
13 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
14 |
+
|
15 |
+
def call_perplexity_api(prompt: str) -> str:
|
16 |
+
"""Call Perplexity AI with a prompt, return the text response if successful."""
|
17 |
+
headers = {
|
18 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
19 |
+
"Content-Type": "application/json",
|
20 |
+
}
|
21 |
+
|
22 |
+
payload = {
|
23 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
24 |
+
"messages": [{"role": "user", "content": prompt}],
|
25 |
+
"temperature": 0.3,
|
26 |
+
}
|
27 |
+
|
28 |
+
try:
|
29 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
30 |
+
response.raise_for_status()
|
31 |
+
return response.json()["choices"][0]["message"]["content"]
|
32 |
+
except Exception as e:
|
33 |
+
st.error(f"API Error: {str(e)}")
|
34 |
+
return ""
|
35 |
+
|
36 |
+
def extract_text_from_pdf(pdf_file):
|
37 |
+
"""Extract text content from a PDF file."""
|
38 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
39 |
+
text = ""
|
40 |
+
for page in pdf_reader.pages:
|
41 |
+
text += page.extract_text() + "\n"
|
42 |
+
return text
|
43 |
+
|
44 |
+
def analyze_paper(text: str, category: str) -> str:
|
45 |
+
"""Generate a prompt and get analysis for a specific category."""
|
46 |
+
prompts = {
|
47 |
+
"Summarized Abstract": "Extract and summarize the abstract from this research paper:",
|
48 |
+
"Results": "What are the main results and findings from this research paper:",
|
49 |
+
"Summarized Introduction": "Summarize the introduction section of this research paper:",
|
50 |
+
"Methods Used": "What are the main methods and methodologies used in this research:",
|
51 |
+
"Literature Survey": "Summarize the literature review or related work from this paper:",
|
52 |
+
"Limitations": "What are the limitations mentioned in this research:",
|
53 |
+
"Contributions": "What are the main contributions of this research:",
|
54 |
+
"Practical Implications": "What are the practical implications of this research:",
|
55 |
+
"Objectives": "What are the main objectives of this research:",
|
56 |
+
"Findings": "What are the key findings from this research:",
|
57 |
+
"Future Research": "What future research directions are suggested in this paper:",
|
58 |
+
"Dependent Variables": "What are the dependent variables studied in this research:",
|
59 |
+
"Independent Variables": "What are the independent variables studied in this research:",
|
60 |
+
"Dataset": "What dataset(s) were used in this research:",
|
61 |
+
"Problem Statement": "What is the main problem statement or research question:",
|
62 |
+
"Challenges": "What challenges were faced or addressed in this research:",
|
63 |
+
"Applications": "What are the potential applications of this research:"
|
64 |
+
}
|
65 |
+
|
66 |
+
prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
|
67 |
+
return call_perplexity_api(prompt)
|
68 |
+
|
69 |
+
def main():
|
70 |
+
st.title("Research Paper Analysis Tool")
|
71 |
+
|
72 |
+
# File uploader
|
73 |
+
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
|
74 |
+
|
75 |
+
if uploaded_files:
|
76 |
+
if st.button("Process Papers"):
|
77 |
+
# Initialize progress bar
|
78 |
+
progress_bar = st.progress(0)
|
79 |
+
status_text = st.empty()
|
80 |
+
|
81 |
+
# Initialize results dictionary
|
82 |
+
results = []
|
83 |
+
|
84 |
+
# Define categories
|
85 |
+
categories = [
|
86 |
+
"Summarized Abstract", "Results", "Summarized Introduction",
|
87 |
+
"Methods Used", "Literature Survey", "Limitations",
|
88 |
+
"Contributions", "Practical Implications", "Objectives",
|
89 |
+
"Findings", "Future Research", "Dependent Variables",
|
90 |
+
"Independent Variables", "Dataset", "Problem Statement",
|
91 |
+
"Challenges", "Applications"
|
92 |
+
]
|
93 |
+
|
94 |
+
# Process each file
|
95 |
+
for i, file in enumerate(uploaded_files):
|
96 |
+
status_text.text(f"Processing {file.name}...")
|
97 |
+
|
98 |
+
# Extract text from PDF
|
99 |
+
text = extract_text_from_pdf(file)
|
100 |
+
|
101 |
+
# Initialize paper results
|
102 |
+
paper_results = {"Filename": file.name}
|
103 |
+
|
104 |
+
# Analyze each category
|
105 |
+
for j, category in enumerate(categories):
|
106 |
+
status_text.text(f"Processing {file.name} - {category}")
|
107 |
+
paper_results[category] = analyze_paper(text, category)
|
108 |
+
|
109 |
+
# Update progress
|
110 |
+
progress = (i * len(categories) + j + 1) / (len(uploaded_files) * len(categories))
|
111 |
+
progress_bar.progress(progress)
|
112 |
+
|
113 |
+
# Add small delay to avoid API rate limits
|
114 |
+
time.sleep(1)
|
115 |
+
|
116 |
+
results.append(paper_results)
|
117 |
+
|
118 |
+
# Create DataFrame
|
119 |
+
df = pd.DataFrame(results)
|
120 |
+
|
121 |
+
# Convert DataFrame to CSV
|
122 |
+
csv = df.to_csv(index=False)
|
123 |
+
|
124 |
+
# Create download button
|
125 |
+
st.download_button(
|
126 |
+
label="Download Results as CSV",
|
127 |
+
data=csv,
|
128 |
+
file_name="research_papers_analysis.csv",
|
129 |
+
mime="text/csv"
|
130 |
+
)
|
131 |
+
|
132 |
+
# Display results in the app
|
133 |
+
st.subheader("Analysis Results")
|
134 |
+
st.dataframe(df)
|
135 |
+
|
136 |
+
status_text.text("Processing complete!")
|
137 |
+
progress_bar.progress(1.0)
|
138 |
+
|
139 |
+
if __name__ == "__main__":
|
140 |
+
main()
|
file_upload_vectorize.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pymongo import MongoClient
|
2 |
+
from datetime import datetime
|
3 |
+
import openai
|
4 |
+
import google.generativeai as genai
|
5 |
+
import streamlit as st
|
6 |
+
from db import courses_collection2, faculty_collection, students_collection, vectors_collection
|
7 |
+
from PIL import Image
|
8 |
+
import PyPDF2, docx, io
|
9 |
+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
|
10 |
+
from bson import ObjectId
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
import os
|
13 |
+
from create_course import courses_collection
|
14 |
+
|
15 |
+
load_dotenv()
|
16 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
17 |
+
OPENAI_KEY = os.getenv('OPENAI_KEY')
|
18 |
+
GEMINI_KEY = os.getenv('GEMINI_KEY')
|
19 |
+
|
20 |
+
|
21 |
+
client = MongoClient(MONGO_URI)
|
22 |
+
db = client['novascholar_db']
|
23 |
+
resources_collection = db['resources']
|
24 |
+
|
25 |
+
# Configure APIs
|
26 |
+
openai.api_key = OPENAI_KEY
|
27 |
+
genai.configure(api_key=GEMINI_KEY)
|
28 |
+
model = genai.GenerativeModel('gemini-pro')
|
29 |
+
|
30 |
+
def upload_resource(course_id, session_id, file_name, file_content, material_type):
|
31 |
+
# material_data = {
|
32 |
+
# "session_id": session_id,
|
33 |
+
# "course_id": course_id,
|
34 |
+
# "file_name": file_name,
|
35 |
+
# "file_content": file_content,
|
36 |
+
# "material_type": material_type,
|
37 |
+
# "uploaded_at": datetime.utcnow()
|
38 |
+
# }
|
39 |
+
# return resources_collection.insert_one(material_data)
|
40 |
+
# resource_id = ObjectId()
|
41 |
+
|
42 |
+
# Extract text content from the file
|
43 |
+
text_content = extract_text_from_file(file_content)
|
44 |
+
|
45 |
+
# Check if a resource with this file name already exists
|
46 |
+
existing_resource = resources_collection.find_one({
|
47 |
+
"session_id": session_id,
|
48 |
+
"file_name": file_name
|
49 |
+
})
|
50 |
+
|
51 |
+
if existing_resource:
|
52 |
+
return existing_resource["_id"]
|
53 |
+
|
54 |
+
# Read the file content
|
55 |
+
file_content.seek(0) # Reset the file pointer to the beginning
|
56 |
+
original_file_content = file_content.read()
|
57 |
+
|
58 |
+
|
59 |
+
resource_data = {
|
60 |
+
"_id": ObjectId(),
|
61 |
+
"course_id": course_id,
|
62 |
+
"session_id": session_id,
|
63 |
+
"file_name": file_name,
|
64 |
+
"file_type": file_content.type,
|
65 |
+
"text_content": text_content,
|
66 |
+
"file_content": original_file_content, # Store the original file content
|
67 |
+
"material_type": material_type,
|
68 |
+
"uploaded_at": datetime.utcnow()
|
69 |
+
}
|
70 |
+
|
71 |
+
resources_collection.insert_one(resource_data)
|
72 |
+
resource_id = resource_data["_id"]
|
73 |
+
|
74 |
+
courses_collection.update_one(
|
75 |
+
{
|
76 |
+
"course_id": course_id,
|
77 |
+
"sessions.session_id": session_id
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"$push": {"sessions.$.pre_class.resources": resource_id}
|
81 |
+
}
|
82 |
+
)
|
83 |
+
# print("End of Upload Resource, Resource ID is: ", resource_id)
|
84 |
+
# return resource_id
|
85 |
+
if text_content:
|
86 |
+
create_vector_store(text_content, resource_id)
|
87 |
+
return resource_id
|
88 |
+
|
89 |
+
def assignment_submit(student_id, course_id, session_id, assignment_id, file_name, file_content, text_content, material_type):
|
90 |
+
# Read the file content
|
91 |
+
file_content.seek(0) # Reset the file pointer to the beginning
|
92 |
+
original_file_content = file_content.read()
|
93 |
+
|
94 |
+
assignment_data = {
|
95 |
+
"student_id": student_id,
|
96 |
+
"course_id": course_id,
|
97 |
+
"session_id": session_id,
|
98 |
+
"assignment_id": assignment_id,
|
99 |
+
"file_name": file_name,
|
100 |
+
"file_type": file_content.type,
|
101 |
+
"file_content": original_file_content, # Store the original file content
|
102 |
+
"text_content": text_content,
|
103 |
+
"material_type": material_type,
|
104 |
+
"submitted_at": datetime.utcnow(),
|
105 |
+
"file_url": "sample_url"
|
106 |
+
}
|
107 |
+
try:
|
108 |
+
courses_collection2.update_one(
|
109 |
+
{
|
110 |
+
"course_id": course_id,
|
111 |
+
"sessions.session_id": session_id,
|
112 |
+
"sessions.post_class.assignments.id": assignment_id
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"$push": {"sessions.$.post_class.assignments.$[assignment].submissions": assignment_data}
|
116 |
+
},
|
117 |
+
array_filters=[{"assignment.id": assignment_id}]
|
118 |
+
)
|
119 |
+
return True
|
120 |
+
except Exception as db_error:
|
121 |
+
print(f"Error saving submission: {str(db_error)}")
|
122 |
+
return False
|
123 |
+
|
124 |
+
def extract_text_from_file(uploaded_file):
|
125 |
+
text = ""
|
126 |
+
file_type = uploaded_file.type
|
127 |
+
|
128 |
+
try:
|
129 |
+
if file_type == "text/plain":
|
130 |
+
text = uploaded_file.getvalue().decode("utf-8")
|
131 |
+
elif file_type == "application/pdf":
|
132 |
+
pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file.getvalue()))
|
133 |
+
for page in pdf_reader.pages:
|
134 |
+
text += page.extract_text() + "\n"
|
135 |
+
elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
136 |
+
doc = docx.Document(io.BytesIO(uploaded_file.getvalue()))
|
137 |
+
for para in doc.paragraphs:
|
138 |
+
text += para.text + "\n"
|
139 |
+
return text
|
140 |
+
except Exception as e:
|
141 |
+
st.error(f"Error processing file: {str(e)}")
|
142 |
+
return None
|
143 |
+
|
144 |
+
def get_embedding(text):
|
145 |
+
response = openai.embeddings.create(
|
146 |
+
model="text-embedding-ada-002",
|
147 |
+
input=text
|
148 |
+
)
|
149 |
+
return response.data[0].embedding
|
150 |
+
|
151 |
+
def create_vector_store(text, resource_id):
|
152 |
+
# resource_object_id = ObjectId(resource_id)
|
153 |
+
# Ensure resource_id is an ObjectId
|
154 |
+
# if not isinstance(resource_id, ObjectId):
|
155 |
+
# resource_id = ObjectId(resource_id)
|
156 |
+
|
157 |
+
existing_vector = vectors_collection.find_one({
|
158 |
+
"resource_id": resource_id,
|
159 |
+
"text": text
|
160 |
+
})
|
161 |
+
|
162 |
+
if existing_vector:
|
163 |
+
print(f"Vector already exists for Resource ID: {resource_id}")
|
164 |
+
return
|
165 |
+
|
166 |
+
print(f"In Vector Store method, Resource ID is: {resource_id}")
|
167 |
+
document = Document(text=text)
|
168 |
+
embedding = get_embedding(text)
|
169 |
+
|
170 |
+
vector_data = {
|
171 |
+
"resource_id": resource_id,
|
172 |
+
"vector": embedding,
|
173 |
+
"text": text,
|
174 |
+
"created_at": datetime.utcnow()
|
175 |
+
}
|
176 |
+
|
177 |
+
vectors_collection.insert_one(vector_data)
|
178 |
+
|
179 |
+
# return VectorStoreIndex.from_documents([document])
|
gen_mcqs.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ast
|
2 |
+
from pymongo import MongoClient
|
3 |
+
from datetime import datetime
|
4 |
+
import openai
|
5 |
+
import google.generativeai as genai
|
6 |
+
from google.generativeai import GenerativeModel
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
import os
|
9 |
+
from file_upload_vectorize import resources_collection, vectors_collection, courses_collection2, faculty_collection
|
10 |
+
|
11 |
+
# Load environment variables
|
12 |
+
load_dotenv()
|
13 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
14 |
+
OPENAI_KEY = os.getenv('OPENAI_KEY')
|
15 |
+
GEMINI_KEY = os.getenv('GEMINI_KEY')
|
16 |
+
|
17 |
+
# Configure APIs
|
18 |
+
openai.api_key = OPENAI_KEY
|
19 |
+
genai.configure(api_key=GEMINI_KEY)
|
20 |
+
model = genai.GenerativeModel('gemini-pro')
|
21 |
+
|
22 |
+
# Connect to MongoDB
|
23 |
+
client = MongoClient(MONGO_URI)
|
24 |
+
db = client['novascholar_db']
|
25 |
+
quizzes_collection = db["quizzes"]
|
26 |
+
|
27 |
+
def strip_code_markers(response_text):
|
28 |
+
"""Strip off the markers ``` and python from a LLM model's response"""
|
29 |
+
if response_text.startswith("```python"):
|
30 |
+
response_text = response_text[len("```python"):].strip()
|
31 |
+
if response_text.startswith("```"):
|
32 |
+
response_text = response_text[len("```"):].strip()
|
33 |
+
if response_text.endswith("```"):
|
34 |
+
response_text = response_text[:-len("```")].strip()
|
35 |
+
return response_text
|
36 |
+
|
37 |
+
|
38 |
+
# New function to generate MCQs using Gemini
|
39 |
+
def generate_mcqs(context, num_questions, session_title, session_description):
|
40 |
+
"""Generate MCQs either from context or session details"""
|
41 |
+
try:
|
42 |
+
# Initialize Gemini model
|
43 |
+
if context:
|
44 |
+
prompt = f"""
|
45 |
+
Based on the following content, generate {num_questions} multiple choice questions.
|
46 |
+
Format each question as a Python dictionary with the following structure:
|
47 |
+
{{
|
48 |
+
"question": "Question text here",
|
49 |
+
"options": ["A) option1", "B) option2", "C) option3", "D) option4"],
|
50 |
+
"correct_option": "A) option1" or "B) option2" or "C) option3" or "D) option4"
|
51 |
+
}}
|
52 |
+
|
53 |
+
Content:
|
54 |
+
{context}
|
55 |
+
|
56 |
+
Generate challenging but clear questions that test understanding of key concepts.
|
57 |
+
Return only the Python list of dictionaries.
|
58 |
+
"""
|
59 |
+
else:
|
60 |
+
prompt = f"""
|
61 |
+
Generate {num_questions} multiple choice questions about the topic:
|
62 |
+
Title: {session_title}
|
63 |
+
Description: {session_description}
|
64 |
+
|
65 |
+
Format each question as a Python dictionary with the following structure:
|
66 |
+
{{
|
67 |
+
"question": "Question text here",
|
68 |
+
"options": ["A) option1", "B) option2", "C) option3", "D) option4"],
|
69 |
+
"correct_option": "A" or "B" or "C" or "D"
|
70 |
+
}}
|
71 |
+
|
72 |
+
Generate challenging but clear questions.
|
73 |
+
Return only the Python list of dictionaries without any additional formatting or markers
|
74 |
+
Do not write any other text, do not start the response with (```python), do not end the response with backticks(```)
|
75 |
+
A Sample response should look like this: Response Text: [
|
76 |
+
{
|
77 |
+
"question": "Which of the following is NOT a valid data type in C++?",
|
78 |
+
"options": ["int", "double", "boolean", "char"],
|
79 |
+
"correct_option": "C"
|
80 |
+
}
|
81 |
+
] (Notice that there are no backticks(```) around the response and no (```python))
|
82 |
+
.
|
83 |
+
"""
|
84 |
+
|
85 |
+
response = model.generate_content(prompt)
|
86 |
+
response_text = response.text.strip()
|
87 |
+
print("Response Text:", response_text)
|
88 |
+
modified_response_text = strip_code_markers(response_text)
|
89 |
+
print("Response Text Modified to:", modified_response_text)
|
90 |
+
# Extract and parse the response to get the list of MCQs
|
91 |
+
mcqs = ast.literal_eval(modified_response_text) # Be careful with eval, consider using ast.literal_eval for production
|
92 |
+
print(mcqs)
|
93 |
+
if not mcqs:
|
94 |
+
raise ValueError("No questions generated")
|
95 |
+
return mcqs
|
96 |
+
except Exception as e:
|
97 |
+
print(f"Error generating MCQs: , error: {e}")
|
98 |
+
return None
|
99 |
+
|
100 |
+
# New function to save quiz to database
|
101 |
+
def save_quiz(course_id, session_id, title, questions, user_id):
|
102 |
+
"""Save quiz to database"""
|
103 |
+
try:
|
104 |
+
quiz_data = {
|
105 |
+
"user_id": user_id,
|
106 |
+
"course_id": course_id,
|
107 |
+
"session_id": session_id,
|
108 |
+
"title": title,
|
109 |
+
"questions": questions,
|
110 |
+
"created_at": datetime.utcnow(),
|
111 |
+
"status": "active",
|
112 |
+
"submissions": []
|
113 |
+
}
|
114 |
+
result = quizzes_collection.insert_one(quiz_data)
|
115 |
+
return result.inserted_id
|
116 |
+
except Exception as e:
|
117 |
+
print(f"Error saving quiz: {e}")
|
118 |
+
return None
|
119 |
+
|
120 |
+
|
121 |
+
def get_student_quiz_score(quiz_id, student_id):
|
122 |
+
"""Get student's score for a specific quiz"""
|
123 |
+
quiz = quizzes_collection.find_one(
|
124 |
+
{
|
125 |
+
"_id": quiz_id,
|
126 |
+
"submissions.student_id": student_id
|
127 |
+
},
|
128 |
+
{"submissions.$": 1}
|
129 |
+
)
|
130 |
+
if quiz and quiz.get('submissions'):
|
131 |
+
return quiz['submissions'][0].get('score')
|
132 |
+
return None
|
133 |
+
|
134 |
+
# def submit_quiz_answers(quiz_id, student_id, student_answers):
|
135 |
+
# """Submit and score student's quiz answers"""
|
136 |
+
# quiz = quizzes_collection.find_one({"_id": quiz_id})
|
137 |
+
# if not quiz:
|
138 |
+
# return None
|
139 |
+
|
140 |
+
# # Calculate score
|
141 |
+
# correct_answers = 0
|
142 |
+
# total_questions = len(quiz['questions'])
|
143 |
+
|
144 |
+
# for q_idx, question in enumerate(quiz['questions']):
|
145 |
+
# if student_answers.get(str(q_idx)) == question['correct_option']:
|
146 |
+
# correct_answers += 1
|
147 |
+
|
148 |
+
# score = (correct_answers / total_questions) * 100
|
149 |
+
|
150 |
+
# # Store submission
|
151 |
+
# submission_data = {
|
152 |
+
# "student_id": student_id,
|
153 |
+
# "answers": student_answers,
|
154 |
+
# "score": score,
|
155 |
+
# "submitted_at": datetime.utcnow()
|
156 |
+
# }
|
157 |
+
|
158 |
+
# # Update quiz with submission
|
159 |
+
# quizzes_collection.update_one(
|
160 |
+
# {"_id": quiz_id},
|
161 |
+
# {
|
162 |
+
# "$push": {"submissions": submission_data}
|
163 |
+
# }
|
164 |
+
# )
|
165 |
+
|
166 |
+
# return score
|
167 |
+
def submit_quiz_answers(quiz_id, student_id, student_answers):
|
168 |
+
"""Submit and score student's quiz answers"""
|
169 |
+
try:
|
170 |
+
quiz = quizzes_collection.find_one({"_id": quiz_id})
|
171 |
+
if not quiz:
|
172 |
+
return None
|
173 |
+
|
174 |
+
# Calculate score
|
175 |
+
correct_answers = 0
|
176 |
+
total_questions = len(quiz['questions'])
|
177 |
+
|
178 |
+
for q_idx, question in enumerate(quiz['questions']):
|
179 |
+
student_answer = student_answers.get(str(q_idx))
|
180 |
+
if student_answer: # Only check if answer was provided
|
181 |
+
# Extract the option letter (A, B, C, D) from the full answer string
|
182 |
+
answer_letter = student_answer.split(')')[0].strip()
|
183 |
+
if answer_letter == question['correct_option']:
|
184 |
+
correct_answers += 1
|
185 |
+
|
186 |
+
score = (correct_answers / total_questions) * 100
|
187 |
+
|
188 |
+
# Store submission
|
189 |
+
submission_data = {
|
190 |
+
"student_id": student_id,
|
191 |
+
"answers": student_answers,
|
192 |
+
"score": score,
|
193 |
+
"submitted_at": datetime.utcnow()
|
194 |
+
}
|
195 |
+
|
196 |
+
# Update quiz with submission
|
197 |
+
result = quizzes_collection.update_one(
|
198 |
+
{"_id": quiz_id},
|
199 |
+
{"$push": {"submissions": submission_data}}
|
200 |
+
)
|
201 |
+
|
202 |
+
return score if result.modified_count > 0 else None
|
203 |
+
|
204 |
+
except Exception as e:
|
205 |
+
print(f"Error submitting quiz: {e}")
|
206 |
+
return None
|
goals2.py
ADDED
@@ -0,0 +1,658 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from typing import List, Dict
|
3 |
+
import httpx
|
4 |
+
from pathlib import Path
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import json
|
8 |
+
import numpy as np
|
9 |
+
from pymongo import MongoClient
|
10 |
+
from openai import OpenAI
|
11 |
+
from datetime import datetime
|
12 |
+
import asyncio
|
13 |
+
import pandas as pd
|
14 |
+
|
15 |
+
# Load environment variables
|
16 |
+
load_dotenv()
|
17 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
|
18 |
+
MONGODB_URI = os.getenv("MONGO_URI")
|
19 |
+
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
|
20 |
+
|
21 |
+
# Initialize MongoDB client
|
22 |
+
client = MongoClient(MONGODB_URI)
|
23 |
+
db = client["document_analysis"]
|
24 |
+
vectors_collection = db["document_vectors"]
|
25 |
+
|
26 |
+
# Initialize OpenAI client
|
27 |
+
openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
28 |
+
|
29 |
+
|
30 |
+
class GoalAnalyzer:
|
31 |
+
def __init__(self):
|
32 |
+
self.api_key = PERPLEXITY_API_KEY
|
33 |
+
self.base_url = "https://api.perplexity.ai/chat/completions"
|
34 |
+
|
35 |
+
def clean_json_string(self, content: str) -> str:
|
36 |
+
"""Clean and extract valid JSON from string"""
|
37 |
+
# Remove markdown formatting
|
38 |
+
if "```json" in content:
|
39 |
+
content = content.split("```json")[1].split("```")[0]
|
40 |
+
elif "```" in content:
|
41 |
+
content = content.split("```")[1]
|
42 |
+
|
43 |
+
# Find the JSON object boundaries
|
44 |
+
start_idx = content.find("{")
|
45 |
+
end_idx = content.rfind("}") + 1
|
46 |
+
|
47 |
+
if start_idx != -1 and end_idx > 0:
|
48 |
+
content = content[start_idx:end_idx]
|
49 |
+
|
50 |
+
# Clean up common issues
|
51 |
+
content = content.strip()
|
52 |
+
content = content.replace("\n", "")
|
53 |
+
content = content.replace("'", '"')
|
54 |
+
|
55 |
+
return content
|
56 |
+
|
57 |
+
async def get_perplexity_analysis(self, text: str, goal: str) -> Dict:
|
58 |
+
"""Get analysis from Perplexity API"""
|
59 |
+
headers = {
|
60 |
+
"Authorization": f"Bearer {self.api_key}",
|
61 |
+
"Content-Type": "application/json",
|
62 |
+
}
|
63 |
+
|
64 |
+
prompt = f"""
|
65 |
+
Analyze the following text in context of the goal: {goal}
|
66 |
+
|
67 |
+
Text: {text}
|
68 |
+
|
69 |
+
Provide analysis in the following JSON format:
|
70 |
+
{{
|
71 |
+
"themes": ["theme1", "theme2"],
|
72 |
+
"subthemes": {{"theme1": ["subtheme1", "subtheme2"], "theme2": ["subtheme3"]}},
|
73 |
+
"keywords": ["keyword1", "keyword2"],
|
74 |
+
"relevance_score": 0-100
|
75 |
+
}}
|
76 |
+
"""
|
77 |
+
|
78 |
+
try:
|
79 |
+
async with httpx.AsyncClient() as client:
|
80 |
+
payload = {
|
81 |
+
"model": "llama-3.1-sonar-small-128k-chat", # Updated to supported model
|
82 |
+
"messages": [
|
83 |
+
{
|
84 |
+
"role": "system",
|
85 |
+
"content": "You are an AI assistant that analyzes documents and provides structured analysis.",
|
86 |
+
},
|
87 |
+
{"role": "user", "content": prompt},
|
88 |
+
],
|
89 |
+
"max_tokens": 1024,
|
90 |
+
}
|
91 |
+
|
92 |
+
# Debug info using expander
|
93 |
+
with st.expander("Debug Info", expanded=False):
|
94 |
+
st.write("Request payload:", payload)
|
95 |
+
|
96 |
+
response = await client.post(
|
97 |
+
self.base_url, headers=headers, json=payload, timeout=30.0
|
98 |
+
)
|
99 |
+
|
100 |
+
# Debug response info
|
101 |
+
with st.expander("Response Info", expanded=False):
|
102 |
+
st.write("Response status:", response.status_code)
|
103 |
+
st.write("Response headers:", dict(response.headers))
|
104 |
+
st.write("Response content:", response.text)
|
105 |
+
|
106 |
+
if response.status_code != 200:
|
107 |
+
error_detail = (
|
108 |
+
response.json() if response.content else "No error details"
|
109 |
+
)
|
110 |
+
raise Exception(
|
111 |
+
f"API returned status code {response.status_code}. Details: {error_detail}"
|
112 |
+
)
|
113 |
+
|
114 |
+
result = response.json()
|
115 |
+
content = (
|
116 |
+
result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
117 |
+
)
|
118 |
+
|
119 |
+
# Clean and parse JSON
|
120 |
+
cleaned_content = self.clean_json_string(content)
|
121 |
+
|
122 |
+
try:
|
123 |
+
analysis = json.loads(cleaned_content)
|
124 |
+
|
125 |
+
# Validate required fields
|
126 |
+
required_fields = [
|
127 |
+
"themes",
|
128 |
+
"subthemes",
|
129 |
+
"keywords",
|
130 |
+
"relevance_score",
|
131 |
+
]
|
132 |
+
for field in required_fields:
|
133 |
+
if field not in analysis:
|
134 |
+
analysis[field] = [] if field != "relevance_score" else 0
|
135 |
+
|
136 |
+
return analysis
|
137 |
+
|
138 |
+
except json.JSONDecodeError as e:
|
139 |
+
st.error(f"JSON parsing error: {str(e)}")
|
140 |
+
st.error(f"Failed content: {cleaned_content}")
|
141 |
+
return {
|
142 |
+
"themes": ["Error parsing themes"],
|
143 |
+
"subthemes": {"Error": ["Failed to parse subthemes"]},
|
144 |
+
"keywords": ["parsing-error"],
|
145 |
+
"relevance_score": 0,
|
146 |
+
}
|
147 |
+
|
148 |
+
except Exception as e:
|
149 |
+
st.error(f"API Error: {str(e)}")
|
150 |
+
return None
|
151 |
+
|
152 |
+
def extract_text_from_file(self, file) -> str:
|
153 |
+
"""Extract text content from uploaded file"""
|
154 |
+
try:
|
155 |
+
text = ""
|
156 |
+
file_type = file.type
|
157 |
+
|
158 |
+
if file_type == "text/plain":
|
159 |
+
text = file.getvalue().decode("utf-8")
|
160 |
+
elif file_type == "application/pdf":
|
161 |
+
import PyPDF2
|
162 |
+
|
163 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
164 |
+
for page in pdf_reader.pages:
|
165 |
+
text += page.extract_text()
|
166 |
+
elif (
|
167 |
+
file_type
|
168 |
+
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
169 |
+
):
|
170 |
+
import docx
|
171 |
+
|
172 |
+
doc = docx.Document(file)
|
173 |
+
text = " ".join([paragraph.text for paragraph in doc.paragraphs])
|
174 |
+
|
175 |
+
return text
|
176 |
+
except Exception as e:
|
177 |
+
st.error(f"Error extracting text: {str(e)}")
|
178 |
+
return ""
|
179 |
+
|
180 |
+
|
181 |
+
class DocumentVectorizer:
|
182 |
+
def __init__(self):
|
183 |
+
self.model = "text-embedding-ada-002"
|
184 |
+
self.client = MongoClient(MONGODB_URI)
|
185 |
+
self.db = self.client["document_analysis"]
|
186 |
+
self.vectors_collection = self.db["document_vectors"]
|
187 |
+
|
188 |
+
# Create vector search index if it doesn't exist
|
189 |
+
try:
|
190 |
+
self.vectors_collection.create_index(
|
191 |
+
[("vector", "2dsphere")], # Changed to 2dsphere for vector indexing
|
192 |
+
{
|
193 |
+
"vectorSearchConfig": {
|
194 |
+
"dimensions": 1536, # OpenAI embedding dimensions
|
195 |
+
"similarity": "cosine",
|
196 |
+
}
|
197 |
+
},
|
198 |
+
)
|
199 |
+
except Exception as e:
|
200 |
+
st.warning(f"Vector index may already exist")
|
201 |
+
|
202 |
+
def get_embedding(self, text: str) -> list:
|
203 |
+
"""Get embedding vector for text using OpenAI"""
|
204 |
+
try:
|
205 |
+
response = openai_client.embeddings.create(model=self.model, input=text)
|
206 |
+
return response.data[0].embedding
|
207 |
+
except Exception as e:
|
208 |
+
st.error(f"Error getting embedding: {str(e)}")
|
209 |
+
return None
|
210 |
+
|
211 |
+
# Add this method to DocumentVectorizer class
|
212 |
+
def vector_exists(self, doc_name: str) -> bool:
|
213 |
+
"""Check if vector exists for document"""
|
214 |
+
return self.vectors_collection.count_documents({"name": doc_name}) > 0
|
215 |
+
|
216 |
+
# Update store_vector method in DocumentVectorizer class
|
217 |
+
def store_vector(self, doc_name: str, vector: list, text: str, goal: str = None):
|
218 |
+
"""Store document/goal vector in MongoDB using upsert"""
|
219 |
+
try:
|
220 |
+
vector_doc = {
|
221 |
+
"name": doc_name,
|
222 |
+
"vector": vector,
|
223 |
+
"text": text,
|
224 |
+
"type": "document" if goal is None else "goal",
|
225 |
+
"goal": goal,
|
226 |
+
"updated_at": datetime.utcnow(),
|
227 |
+
}
|
228 |
+
|
229 |
+
# Use update_one with upsert
|
230 |
+
self.vectors_collection.update_one(
|
231 |
+
{"name": doc_name},
|
232 |
+
{"$set": vector_doc, "$setOnInsert": {"created_at": datetime.utcnow()}},
|
233 |
+
upsert=True,
|
234 |
+
)
|
235 |
+
|
236 |
+
except Exception as e:
|
237 |
+
st.error(f"Error storing vector: {str(e)}")
|
238 |
+
|
239 |
+
# Update vector_search method in DocumentVectorizer class
|
240 |
+
def vector_search(self, query_vector: List[float], limit: int = 5) -> List[Dict]:
|
241 |
+
"""Search for similar documents using vector similarity"""
|
242 |
+
try:
|
243 |
+
# Get all documents
|
244 |
+
documents = list(self.vectors_collection.find({"type": "document"}))
|
245 |
+
|
246 |
+
# Calculate similarities
|
247 |
+
similarities = []
|
248 |
+
for doc in documents:
|
249 |
+
similarity = self.calculate_similarity(query_vector, doc["vector"])
|
250 |
+
similarities.append(
|
251 |
+
{
|
252 |
+
"name": doc["name"],
|
253 |
+
"text": doc["text"],
|
254 |
+
"similarity": similarity, # Keep as float
|
255 |
+
"similarity_display": f"{similarity*100:.1f}%", # Add display version
|
256 |
+
}
|
257 |
+
)
|
258 |
+
|
259 |
+
# Sort by similarity and get top k
|
260 |
+
sorted_docs = sorted(
|
261 |
+
similarities,
|
262 |
+
key=lambda x: x["similarity"], # Sort by float value
|
263 |
+
reverse=True,
|
264 |
+
)[:limit]
|
265 |
+
|
266 |
+
return sorted_docs
|
267 |
+
|
268 |
+
except Exception as e:
|
269 |
+
st.error(f"Vector search error: {str(e)}")
|
270 |
+
return []
|
271 |
+
|
272 |
+
def find_similar_documents(self, text: str, limit: int = 5) -> List[Dict]:
|
273 |
+
"""Find similar documents for given text"""
|
274 |
+
vector = self.get_embedding(text)
|
275 |
+
if vector:
|
276 |
+
return self.vector_search(vector, limit)
|
277 |
+
return []
|
278 |
+
|
279 |
+
def calculate_similarity(self, vector1: list, vector2: list) -> float:
|
280 |
+
"""Calculate cosine similarity between two vectors"""
|
281 |
+
return np.dot(vector1, vector2) / (
|
282 |
+
np.linalg.norm(vector1) * np.linalg.norm(vector2)
|
283 |
+
)
|
284 |
+
|
285 |
+
|
286 |
+
def display_analysis_results(analysis: Dict):
|
287 |
+
"""Display analysis results in Streamlit UI"""
|
288 |
+
if not analysis:
|
289 |
+
return
|
290 |
+
|
291 |
+
# Display Themes
|
292 |
+
st.subheader("Themes")
|
293 |
+
for theme in analysis.get("themes", []):
|
294 |
+
with st.expander(f"🎯 {theme}"):
|
295 |
+
# Display subthemes for this theme
|
296 |
+
subthemes = analysis.get("subthemes", {}).get(theme, [])
|
297 |
+
if subthemes:
|
298 |
+
st.write("**Subthemes:**")
|
299 |
+
for subtheme in subthemes:
|
300 |
+
st.write(f"- {subtheme}")
|
301 |
+
|
302 |
+
# Display Keywords
|
303 |
+
st.subheader("Keywords")
|
304 |
+
keywords = analysis.get("keywords", [])
|
305 |
+
st.write(" | ".join([f"🔑 {keyword}" for keyword in keywords]))
|
306 |
+
|
307 |
+
# Display Relevance Score
|
308 |
+
score = analysis.get("relevance_score", 0)
|
309 |
+
st.metric("Relevance Score", f"{score}%")
|
310 |
+
|
311 |
+
|
312 |
+
def display_analyst_dashboard():
|
313 |
+
st.title("Multi-Goal Document Analysis")
|
314 |
+
|
315 |
+
with st.sidebar:
|
316 |
+
st.markdown("### Input Section")
|
317 |
+
tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
318 |
+
# tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
319 |
+
|
320 |
+
with tab1:
|
321 |
+
# Multiple goals input
|
322 |
+
num_goals = st.number_input("Number of goals:", min_value=1, value=1)
|
323 |
+
goals = []
|
324 |
+
for i in range(num_goals):
|
325 |
+
goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
|
326 |
+
if goal:
|
327 |
+
goals.append(goal)
|
328 |
+
|
329 |
+
uploaded_files = st.file_uploader(
|
330 |
+
"Upload documents",
|
331 |
+
accept_multiple_files=True,
|
332 |
+
type=["txt", "pdf", "docx"],
|
333 |
+
)
|
334 |
+
analyze_button = (
|
335 |
+
st.button("Analyze Documents") if goals and uploaded_files else None
|
336 |
+
)
|
337 |
+
|
338 |
+
with tab2:
|
339 |
+
# Keep existing similarity search tab
|
340 |
+
search_text = st.text_area("Enter text to find similar documents:")
|
341 |
+
search_limit = st.slider("Number of results", 1, 10, 5)
|
342 |
+
search_button = st.button("Search Similar") if search_text else None
|
343 |
+
|
344 |
+
if st.button("Logout", use_container_width=True):
|
345 |
+
for key in st.session_state.keys():
|
346 |
+
del st.session_state[key]
|
347 |
+
st.rerun()
|
348 |
+
|
349 |
+
if analyze_button:
|
350 |
+
analyzer = GoalAnalyzer()
|
351 |
+
vectorizer = DocumentVectorizer()
|
352 |
+
|
353 |
+
# Store vectors
|
354 |
+
doc_vectors = {}
|
355 |
+
goal_vectors = {}
|
356 |
+
|
357 |
+
# Process goals first
|
358 |
+
with st.spinner("Processing goals..."):
|
359 |
+
for i, goal in enumerate(goals):
|
360 |
+
vector = vectorizer.get_embedding(goal)
|
361 |
+
if vector:
|
362 |
+
goal_vectors[f"Goal {i+1}"] = vector
|
363 |
+
vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
|
364 |
+
|
365 |
+
# Process documents
|
366 |
+
with st.spinner("Processing documents..."):
|
367 |
+
for file in uploaded_files:
|
368 |
+
st.markdown(f"### Analysis for {file.name}")
|
369 |
+
|
370 |
+
if vectorizer.vector_exists(file.name):
|
371 |
+
st.info(f"Vector already exists for {file.name}")
|
372 |
+
existing_doc = vectorizer.vectors_collection.find_one(
|
373 |
+
{"name": file.name}
|
374 |
+
)
|
375 |
+
doc_vectors[file.name] = existing_doc["vector"]
|
376 |
+
else:
|
377 |
+
text = analyzer.extract_text_from_file(file)
|
378 |
+
if not text:
|
379 |
+
st.warning(f"Could not extract text from {file.name}")
|
380 |
+
continue
|
381 |
+
|
382 |
+
vector = vectorizer.get_embedding(text)
|
383 |
+
if vector:
|
384 |
+
doc_vectors[file.name] = vector
|
385 |
+
vectorizer.store_vector(file.name, vector, text)
|
386 |
+
|
387 |
+
# Display goal similarities
|
388 |
+
st.subheader("Goal Relevance Scores")
|
389 |
+
col1, col2 = st.columns([1, 2])
|
390 |
+
|
391 |
+
with col1:
|
392 |
+
for goal_name, goal_vector in goal_vectors.items():
|
393 |
+
similarity = (
|
394 |
+
vectorizer.calculate_similarity(
|
395 |
+
doc_vectors[file.name], goal_vector
|
396 |
+
)
|
397 |
+
* 100
|
398 |
+
)
|
399 |
+
st.metric(f"{goal_name}", f"{similarity:.1f}%")
|
400 |
+
|
401 |
+
with col2:
|
402 |
+
# Get analysis for all goals combined
|
403 |
+
analysis = asyncio.run(
|
404 |
+
analyzer.get_perplexity_analysis(text, " | ".join(goals))
|
405 |
+
)
|
406 |
+
display_analysis_results(analysis)
|
407 |
+
|
408 |
+
st.divider()
|
409 |
+
|
410 |
+
# Document similarity matrix
|
411 |
+
if len(doc_vectors) > 1:
|
412 |
+
st.markdown("### Document Similarity Matrix")
|
413 |
+
files = list(doc_vectors.keys())
|
414 |
+
similarity_matrix = []
|
415 |
+
|
416 |
+
for file1 in files:
|
417 |
+
row = []
|
418 |
+
for file2 in files:
|
419 |
+
similarity = vectorizer.calculate_similarity(
|
420 |
+
doc_vectors[file1], doc_vectors[file2]
|
421 |
+
)
|
422 |
+
row.append(similarity)
|
423 |
+
similarity_matrix.append(row)
|
424 |
+
|
425 |
+
df = pd.DataFrame(similarity_matrix, columns=files, index=files)
|
426 |
+
st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
|
427 |
+
|
428 |
+
# Add goal-document similarity matrix
|
429 |
+
st.markdown("### Goal-Document Similarity Matrix")
|
430 |
+
goal_doc_matrix = []
|
431 |
+
goal_names = list(goal_vectors.keys())
|
432 |
+
|
433 |
+
for file in files:
|
434 |
+
row = []
|
435 |
+
for goal in goal_names:
|
436 |
+
similarity = vectorizer.calculate_similarity(
|
437 |
+
doc_vectors[file], goal_vectors[goal]
|
438 |
+
)
|
439 |
+
row.append(similarity)
|
440 |
+
goal_doc_matrix.append(row)
|
441 |
+
|
442 |
+
df_goals = pd.DataFrame(
|
443 |
+
goal_doc_matrix, columns=goal_names, index=files
|
444 |
+
)
|
445 |
+
st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
|
446 |
+
|
447 |
+
# Keep existing similarity search functionality
|
448 |
+
elif search_button:
|
449 |
+
vectorizer = DocumentVectorizer()
|
450 |
+
with st.spinner("Searching similar documents..."):
|
451 |
+
query_vector = vectorizer.get_embedding(search_text)
|
452 |
+
if query_vector:
|
453 |
+
similar_docs = vectorizer.vector_search(query_vector, search_limit)
|
454 |
+
|
455 |
+
if similar_docs:
|
456 |
+
st.markdown("### Similar Documents Found")
|
457 |
+
|
458 |
+
# Create DataFrame with numeric similarities
|
459 |
+
df = pd.DataFrame(similar_docs)
|
460 |
+
|
461 |
+
# Apply gradient to numeric column
|
462 |
+
styled_df = df[["name", "similarity"]].style.background_gradient(
|
463 |
+
cmap="RdYlGn", subset=["similarity"]
|
464 |
+
)
|
465 |
+
|
466 |
+
# Format display after styling
|
467 |
+
styled_df = styled_df.format({"similarity": "{:.1%}"})
|
468 |
+
|
469 |
+
st.dataframe(styled_df)
|
470 |
+
|
471 |
+
# Show document contents
|
472 |
+
for doc in similar_docs:
|
473 |
+
with st.expander(
|
474 |
+
f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
|
475 |
+
):
|
476 |
+
st.text(
|
477 |
+
doc["text"][:20] + "..."
|
478 |
+
if len(doc["text"]) > 20
|
479 |
+
else doc["text"]
|
480 |
+
)
|
481 |
+
else:
|
482 |
+
st.info("No similar documents found")
|
483 |
+
else:
|
484 |
+
st.error("Could not process search query")
|
485 |
+
|
486 |
+
|
487 |
+
def main():
|
488 |
+
st.title("Multi-Goal Document Analysis")
|
489 |
+
|
490 |
+
with st.sidebar:
|
491 |
+
st.markdown("### Input Section")
|
492 |
+
tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
493 |
+
# tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
494 |
+
|
495 |
+
with tab1:
|
496 |
+
# Multiple goals input
|
497 |
+
num_goals = st.number_input("Number of goals:", min_value=1, value=1)
|
498 |
+
goals = []
|
499 |
+
for i in range(num_goals):
|
500 |
+
goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
|
501 |
+
if goal:
|
502 |
+
goals.append(goal)
|
503 |
+
|
504 |
+
uploaded_files = st.file_uploader(
|
505 |
+
"Upload documents",
|
506 |
+
accept_multiple_files=True,
|
507 |
+
type=["txt", "pdf", "docx"],
|
508 |
+
)
|
509 |
+
analyze_button = (
|
510 |
+
st.button("Analyze Documents") if goals and uploaded_files else None
|
511 |
+
)
|
512 |
+
|
513 |
+
with tab2:
|
514 |
+
# Keep existing similarity search tab
|
515 |
+
search_text = st.text_area("Enter text to find similar documents:")
|
516 |
+
search_limit = st.slider("Number of results", 1, 10, 5)
|
517 |
+
search_button = st.button("Search Similar") if search_text else None
|
518 |
+
|
519 |
+
if analyze_button:
|
520 |
+
analyzer = GoalAnalyzer()
|
521 |
+
vectorizer = DocumentVectorizer()
|
522 |
+
|
523 |
+
# Store vectors
|
524 |
+
doc_vectors = {}
|
525 |
+
goal_vectors = {}
|
526 |
+
|
527 |
+
# Process goals first
|
528 |
+
with st.spinner("Processing goals..."):
|
529 |
+
for i, goal in enumerate(goals):
|
530 |
+
vector = vectorizer.get_embedding(goal)
|
531 |
+
if vector:
|
532 |
+
goal_vectors[f"Goal {i+1}"] = vector
|
533 |
+
vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
|
534 |
+
|
535 |
+
# Process documents
|
536 |
+
with st.spinner("Processing documents..."):
|
537 |
+
for file in uploaded_files:
|
538 |
+
st.markdown(f"### Analysis for {file.name}")
|
539 |
+
|
540 |
+
if vectorizer.vector_exists(file.name):
|
541 |
+
st.info(f"Vector already exists for {file.name}")
|
542 |
+
existing_doc = vectorizer.vectors_collection.find_one(
|
543 |
+
{"name": file.name}
|
544 |
+
)
|
545 |
+
doc_vectors[file.name] = existing_doc["vector"]
|
546 |
+
else:
|
547 |
+
text = analyzer.extract_text_from_file(file)
|
548 |
+
if not text:
|
549 |
+
st.warning(f"Could not extract text from {file.name}")
|
550 |
+
continue
|
551 |
+
|
552 |
+
vector = vectorizer.get_embedding(text)
|
553 |
+
if vector:
|
554 |
+
doc_vectors[file.name] = vector
|
555 |
+
vectorizer.store_vector(file.name, vector, text)
|
556 |
+
|
557 |
+
# Display goal similarities
|
558 |
+
st.subheader("Goal Relevance Scores")
|
559 |
+
col1, col2 = st.columns([1, 2])
|
560 |
+
|
561 |
+
with col1:
|
562 |
+
for goal_name, goal_vector in goal_vectors.items():
|
563 |
+
similarity = (
|
564 |
+
vectorizer.calculate_similarity(
|
565 |
+
doc_vectors[file.name], goal_vector
|
566 |
+
)
|
567 |
+
* 100
|
568 |
+
)
|
569 |
+
st.metric(f"{goal_name}", f"{similarity:.1f}%")
|
570 |
+
|
571 |
+
with col2:
|
572 |
+
# Get analysis for all goals combined
|
573 |
+
analysis = asyncio.run(
|
574 |
+
analyzer.get_perplexity_analysis(text, " | ".join(goals))
|
575 |
+
)
|
576 |
+
display_analysis_results(analysis)
|
577 |
+
|
578 |
+
st.divider()
|
579 |
+
|
580 |
+
# Document similarity matrix
|
581 |
+
if len(doc_vectors) > 1:
|
582 |
+
st.markdown("### Document Similarity Matrix")
|
583 |
+
files = list(doc_vectors.keys())
|
584 |
+
similarity_matrix = []
|
585 |
+
|
586 |
+
for file1 in files:
|
587 |
+
row = []
|
588 |
+
for file2 in files:
|
589 |
+
similarity = vectorizer.calculate_similarity(
|
590 |
+
doc_vectors[file1], doc_vectors[file2]
|
591 |
+
)
|
592 |
+
row.append(similarity)
|
593 |
+
similarity_matrix.append(row)
|
594 |
+
|
595 |
+
df = pd.DataFrame(similarity_matrix, columns=files, index=files)
|
596 |
+
st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
|
597 |
+
|
598 |
+
# Add goal-document similarity matrix
|
599 |
+
st.markdown("### Goal-Document Similarity Matrix")
|
600 |
+
goal_doc_matrix = []
|
601 |
+
goal_names = list(goal_vectors.keys())
|
602 |
+
|
603 |
+
for file in files:
|
604 |
+
row = []
|
605 |
+
for goal in goal_names:
|
606 |
+
similarity = vectorizer.calculate_similarity(
|
607 |
+
doc_vectors[file], goal_vectors[goal]
|
608 |
+
)
|
609 |
+
row.append(similarity)
|
610 |
+
goal_doc_matrix.append(row)
|
611 |
+
|
612 |
+
df_goals = pd.DataFrame(
|
613 |
+
goal_doc_matrix, columns=goal_names, index=files
|
614 |
+
)
|
615 |
+
st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
|
616 |
+
|
617 |
+
# Keep existing similarity search functionality
|
618 |
+
elif search_button:
|
619 |
+
vectorizer = DocumentVectorizer()
|
620 |
+
with st.spinner("Searching similar documents..."):
|
621 |
+
query_vector = vectorizer.get_embedding(search_text)
|
622 |
+
if query_vector:
|
623 |
+
similar_docs = vectorizer.vector_search(query_vector, search_limit)
|
624 |
+
|
625 |
+
if similar_docs:
|
626 |
+
st.markdown("### Similar Documents Found")
|
627 |
+
|
628 |
+
# Create DataFrame with numeric similarities
|
629 |
+
df = pd.DataFrame(similar_docs)
|
630 |
+
|
631 |
+
# Apply gradient to numeric column
|
632 |
+
styled_df = df[["name", "similarity"]].style.background_gradient(
|
633 |
+
cmap="RdYlGn", subset=["similarity"]
|
634 |
+
)
|
635 |
+
|
636 |
+
# Format display after styling
|
637 |
+
styled_df = styled_df.format({"similarity": "{:.1%}"})
|
638 |
+
|
639 |
+
st.dataframe(styled_df)
|
640 |
+
|
641 |
+
# Show document contents
|
642 |
+
for doc in similar_docs:
|
643 |
+
with st.expander(
|
644 |
+
f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
|
645 |
+
):
|
646 |
+
st.text(
|
647 |
+
doc["text"][:20] + "..."
|
648 |
+
if len(doc["text"]) > 20
|
649 |
+
else doc["text"]
|
650 |
+
)
|
651 |
+
else:
|
652 |
+
st.info("No similar documents found")
|
653 |
+
else:
|
654 |
+
st.error("Could not process search query")
|
655 |
+
|
656 |
+
|
657 |
+
if __name__ == "__main__":
|
658 |
+
main()
|
infranew.py
ADDED
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import networkx as nx
|
4 |
+
from bokeh.models import HoverTool
|
5 |
+
from bokeh.plotting import figure, from_networkx
|
6 |
+
import requests
|
7 |
+
import json
|
8 |
+
import google.generativeai as genai
|
9 |
+
|
10 |
+
PERPLEXITY_API_KEY = "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"
|
11 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
12 |
+
|
13 |
+
|
14 |
+
def extract_edges(keywords):
|
15 |
+
keywords = [kw.strip() for kw in keywords.split(",")]
|
16 |
+
edges = [
|
17 |
+
(keywords[i], keywords[j])
|
18 |
+
for i in range(len(keywords))
|
19 |
+
for j in range(i + 1, len(keywords))
|
20 |
+
]
|
21 |
+
return edges
|
22 |
+
|
23 |
+
|
24 |
+
def create_knowledge_graph(data):
|
25 |
+
G = nx.Graph()
|
26 |
+
|
27 |
+
for _, row in data.iterrows():
|
28 |
+
words = []
|
29 |
+
for col in data.columns:
|
30 |
+
if pd.notnull(row[col]):
|
31 |
+
# Convert to string and handle numeric values
|
32 |
+
cell_value = str(row[col]).strip()
|
33 |
+
if cell_value:
|
34 |
+
words.extend(cell_value.split())
|
35 |
+
|
36 |
+
if words:
|
37 |
+
edges = extract_edges(",".join(words))
|
38 |
+
G.add_edges_from(edges)
|
39 |
+
|
40 |
+
for word in words:
|
41 |
+
word = word.strip()
|
42 |
+
if word not in G:
|
43 |
+
G.add_node(word, title=word, value=len(word))
|
44 |
+
|
45 |
+
return G
|
46 |
+
|
47 |
+
|
48 |
+
def render_graph_bokeh(G):
|
49 |
+
plot = figure(
|
50 |
+
title="Interactive Knowledge Graph",
|
51 |
+
x_range=(-1.5, 1.5),
|
52 |
+
y_range=(-1.5, 1.5),
|
53 |
+
tools="pan,wheel_zoom,box_zoom,reset,tap",
|
54 |
+
active_scroll="wheel_zoom",
|
55 |
+
)
|
56 |
+
plot.add_tools(HoverTool(tooltips="@index"))
|
57 |
+
|
58 |
+
graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0, 0))
|
59 |
+
|
60 |
+
graph_renderer.node_renderer.glyph.size = 10
|
61 |
+
graph_renderer.node_renderer.glyph.fill_color = "blue"
|
62 |
+
graph_renderer.node_renderer.glyph.line_color = "black"
|
63 |
+
|
64 |
+
graph_renderer.edge_renderer.glyph.line_width = 1
|
65 |
+
graph_renderer.edge_renderer.glyph.line_color = "gray"
|
66 |
+
|
67 |
+
plot.renderers.append(graph_renderer)
|
68 |
+
|
69 |
+
return plot
|
70 |
+
|
71 |
+
|
72 |
+
import re
|
73 |
+
|
74 |
+
|
75 |
+
def search_papers(topic: str, num_papers: int) -> list:
|
76 |
+
headers = {
|
77 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
78 |
+
"Content-Type": "application/json",
|
79 |
+
}
|
80 |
+
|
81 |
+
prompt = f"""Find {num_papers} recent research papers about {topic}.
|
82 |
+
Return ONLY a valid JSON array with the following structure for each paper:
|
83 |
+
[
|
84 |
+
{{
|
85 |
+
"Title": "paper title",
|
86 |
+
"Abstract": "abstract text",
|
87 |
+
"Keywords": "key terms"
|
88 |
+
}}
|
89 |
+
]"""
|
90 |
+
|
91 |
+
payload = {
|
92 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
93 |
+
"messages": [
|
94 |
+
{
|
95 |
+
"role": "system",
|
96 |
+
"content": "You are a research paper analyzer that returns valid JSON arrays.",
|
97 |
+
},
|
98 |
+
{"role": "user", "content": prompt},
|
99 |
+
],
|
100 |
+
"temperature": 0.1,
|
101 |
+
}
|
102 |
+
|
103 |
+
try:
|
104 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
105 |
+
response.raise_for_status()
|
106 |
+
content = response.json()["choices"][0]["message"]["content"]
|
107 |
+
|
108 |
+
# Clean response to ensure valid JSON
|
109 |
+
content = content.strip()
|
110 |
+
if not content.startswith("["):
|
111 |
+
content = content[content.find("[") :]
|
112 |
+
if not content.endswith("]"):
|
113 |
+
content = content[: content.rfind("]") + 1]
|
114 |
+
|
115 |
+
# Remove any trailing commas before closing brackets
|
116 |
+
content = re.sub(r",\s*]", "]", content)
|
117 |
+
content = re.sub(r",\s*}", "}", content)
|
118 |
+
|
119 |
+
papers = json.loads(content)
|
120 |
+
if not isinstance(papers, list):
|
121 |
+
raise ValueError("Response is not a JSON array")
|
122 |
+
return papers
|
123 |
+
except requests.exceptions.RequestException as e:
|
124 |
+
st.error(f"API Request Error: {str(e)}")
|
125 |
+
return []
|
126 |
+
except json.JSONDecodeError as e:
|
127 |
+
st.error(f"Invalid JSON response: {str(e)}")
|
128 |
+
st.error(f"Response content: {response.text}")
|
129 |
+
return []
|
130 |
+
except ValueError as e:
|
131 |
+
st.error(f"Error: {str(e)}")
|
132 |
+
return []
|
133 |
+
|
134 |
+
|
135 |
+
import os
|
136 |
+
|
137 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
138 |
+
GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
|
139 |
+
|
140 |
+
|
141 |
+
def call_gemini_api(prompt: str) -> str:
|
142 |
+
headers = {
|
143 |
+
"Authorization": f"Bearer {GEMINI_API_KEY}",
|
144 |
+
"Content-Type": "application/json",
|
145 |
+
}
|
146 |
+
|
147 |
+
payload = {
|
148 |
+
"prompt": prompt,
|
149 |
+
"max_tokens": 150,
|
150 |
+
"temperature": 0.7,
|
151 |
+
}
|
152 |
+
|
153 |
+
try:
|
154 |
+
model = genai.GenerativeModel("gemini-pro")
|
155 |
+
response = model.generate_content(prompt)
|
156 |
+
return response.text
|
157 |
+
except Exception as e:
|
158 |
+
st.error(f"Gemini API Error: {str(e)}")
|
159 |
+
return ""
|
160 |
+
|
161 |
+
|
162 |
+
def generate_gaps_paragraph(gaps):
|
163 |
+
prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps: {', '.join(gaps)}"
|
164 |
+
return call_gemini_api(prompt)
|
165 |
+
|
166 |
+
|
167 |
+
def generate_insights(G, topic):
|
168 |
+
papers = search_papers(topic, 5)
|
169 |
+
if papers:
|
170 |
+
st.write("### Research Insights from Perplexity API")
|
171 |
+
for paper in papers:
|
172 |
+
st.write(f"**Title:** {paper['Title']}")
|
173 |
+
st.write(f"**Abstract:** {paper['Abstract']}")
|
174 |
+
st.write(f"**Keywords:** {paper['Keywords']}")
|
175 |
+
st.write("---")
|
176 |
+
|
177 |
+
nodes = list(G.nodes(data=True))
|
178 |
+
insights = {}
|
179 |
+
insights["Strong Points"] = [
|
180 |
+
n for n, d in nodes if G.degree(n) > len(G.nodes) * 0.1
|
181 |
+
]
|
182 |
+
insights["Weak Points"] = [n for n, d in nodes if G.degree(n) < len(G.nodes) * 0.05]
|
183 |
+
insights["Gaps"] = [n for n, d in nodes if len(list(nx.neighbors(G, n))) == 0]
|
184 |
+
|
185 |
+
st.write("### Graph-Based Insights")
|
186 |
+
st.write("**Strong Points:**", insights["Strong Points"])
|
187 |
+
st.write("**Weak Points:**", insights["Weak Points"])
|
188 |
+
st.write("**Gaps:**", insights["Gaps"])
|
189 |
+
|
190 |
+
if insights["Gaps"]:
|
191 |
+
with st.spinner("Generating insights about gaps..."):
|
192 |
+
gaps_paragraph = generate_gaps_paragraph(insights["Gaps"])
|
193 |
+
if gaps_paragraph:
|
194 |
+
st.write("### Gaps in Research")
|
195 |
+
st.write(gaps_paragraph)
|
196 |
+
|
197 |
+
|
198 |
+
def main():
|
199 |
+
st.title("Advanced Interactive Knowledge Graph")
|
200 |
+
st.write(
|
201 |
+
"Upload a CSV file to generate a fully interactive and insightful knowledge graph."
|
202 |
+
)
|
203 |
+
|
204 |
+
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
205 |
+
|
206 |
+
if uploaded_file is not None:
|
207 |
+
try:
|
208 |
+
data = pd.read_csv(uploaded_file)
|
209 |
+
st.write("Preview of the uploaded data:")
|
210 |
+
st.dataframe(data.head())
|
211 |
+
|
212 |
+
G = create_knowledge_graph(data)
|
213 |
+
|
214 |
+
st.write("Generated Knowledge Graph:")
|
215 |
+
plot = render_graph_bokeh(G)
|
216 |
+
st.bokeh_chart(plot, use_container_width=True)
|
217 |
+
|
218 |
+
topic = st.text_input(
|
219 |
+
"Enter a topic for additional insights:", "knowledge graphs"
|
220 |
+
)
|
221 |
+
if topic:
|
222 |
+
generate_insights(G, topic)
|
223 |
+
|
224 |
+
except Exception as e:
|
225 |
+
st.error(f"An error occurred while processing the file: {e}")
|
226 |
+
else:
|
227 |
+
st.info("Please upload a CSV file to get started.")
|
228 |
+
|
229 |
+
|
230 |
+
if __name__ == "__main__":
|
231 |
+
main()
|
keywords_database_download.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from pymongo import MongoClient
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import os
|
6 |
+
import json
|
7 |
+
import re
|
8 |
+
|
9 |
+
# 1. Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
MONGODB_URI = os.getenv(
|
12 |
+
"MONGODB_UR",
|
13 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
14 |
+
)
|
15 |
+
# 2. Create MongoDB connection
|
16 |
+
client = MongoClient(MONGODB_URI)
|
17 |
+
db = client["novascholar_db"]
|
18 |
+
collection = db["research_papers"]
|
19 |
+
|
20 |
+
|
21 |
+
def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
|
22 |
+
"""
|
23 |
+
Convert any columns that contain lists into comma-separated strings
|
24 |
+
in order to ensure consistent data types for CSV export.
|
25 |
+
"""
|
26 |
+
for col in df.columns:
|
27 |
+
if any(isinstance(val, list) for val in df[col].dropna()):
|
28 |
+
df[col] = df[col].apply(
|
29 |
+
lambda x: (
|
30 |
+
", ".join(map(str, x))
|
31 |
+
if isinstance(x, list)
|
32 |
+
else (str(x) if pd.notna(x) else "")
|
33 |
+
)
|
34 |
+
)
|
35 |
+
return df
|
36 |
+
|
37 |
+
|
38 |
+
def filter_and_export_collection_to_csv(keyword: str, doc_collection=None):
|
39 |
+
"""
|
40 |
+
Find documents in the given collection with a matching keyword
|
41 |
+
in the 'Keywords' field, export them to CSV, and return the DataFrame
|
42 |
+
and CSV filename.
|
43 |
+
"""
|
44 |
+
# Use the default 'research_papers' collection if none provided
|
45 |
+
if doc_collection is None:
|
46 |
+
doc_collection = collection
|
47 |
+
|
48 |
+
docs = list(doc_collection.find({"Keywords": {"$regex": keyword, "$options": "i"}}))
|
49 |
+
if docs:
|
50 |
+
df = pd.DataFrame(docs)
|
51 |
+
df = convert_mixed_columns(df)
|
52 |
+
csv_filename = "papers_filtered_export.csv"
|
53 |
+
df.to_csv(csv_filename, index=False)
|
54 |
+
return df, csv_filename
|
55 |
+
else:
|
56 |
+
# Return an empty DataFrame if no documents found
|
57 |
+
return pd.DataFrame(), None
|
58 |
+
|
59 |
+
|
60 |
+
def main():
|
61 |
+
# st.set_page_config(page_title="Filter and Export Papers", layout="wide")
|
62 |
+
st.title("Filter and Export Papers by Keyword")
|
63 |
+
|
64 |
+
# Let user select the paper type
|
65 |
+
paper_type = st.selectbox(
|
66 |
+
"Select type of research paper:",
|
67 |
+
[
|
68 |
+
"Review Based Paper",
|
69 |
+
"Opinion/Perspective Based Paper",
|
70 |
+
"Empirical Research Paper",
|
71 |
+
"Research Paper (Other)",
|
72 |
+
],
|
73 |
+
)
|
74 |
+
|
75 |
+
# 5. Let user enter the keyword to filter
|
76 |
+
keyword_input = st.text_input(
|
77 |
+
"Enter the exact keyword to filter papers by 'Keywords' field:"
|
78 |
+
)
|
79 |
+
|
80 |
+
# When user clicks button, use the collection for the selected paper type
|
81 |
+
if st.button("Export Filtered Papers to CSV"):
|
82 |
+
with st.spinner("Exporting filtered documents..."):
|
83 |
+
try:
|
84 |
+
# Determine dynamic collection based on paper type
|
85 |
+
collection_name = paper_type.replace(" ", "_").lower()
|
86 |
+
doc_collection = db[collection_name]
|
87 |
+
|
88 |
+
df, csv_filename = filter_and_export_collection_to_csv(
|
89 |
+
keyword_input, doc_collection
|
90 |
+
)
|
91 |
+
if not df.empty and csv_filename:
|
92 |
+
st.success(
|
93 |
+
f"Successfully exported filtered papers to {csv_filename}!"
|
94 |
+
)
|
95 |
+
st.write("Preview of the filtered DataFrame:")
|
96 |
+
st.dataframe(df)
|
97 |
+
else:
|
98 |
+
st.warning("No matching documents found for that keyword.")
|
99 |
+
except Exception as e:
|
100 |
+
st.error(f"Error exporting filtered papers: {str(e)}")
|
101 |
+
|
102 |
+
|
103 |
+
if __name__ == "__main__":
|
104 |
+
main()
|
live_polls.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# live_poll_feature.py
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
import pandas as pd
|
5 |
+
from datetime import datetime
|
6 |
+
from poll_db_operations import PollDatabase
|
7 |
+
|
8 |
+
class LivePollFeature:
|
9 |
+
def __init__(self):
|
10 |
+
self.db = PollDatabase()
|
11 |
+
|
12 |
+
def display_faculty_interface(self, session_id):
|
13 |
+
"""Display the faculty interface for managing polls"""
|
14 |
+
st.subheader("Live Polls Management")
|
15 |
+
|
16 |
+
# Create new poll
|
17 |
+
with st.expander("Create New Poll", expanded=False):
|
18 |
+
question = st.text_input("Poll Question")
|
19 |
+
|
20 |
+
num_options = st.number_input("Number of Options",
|
21 |
+
min_value=2,
|
22 |
+
max_value=6,
|
23 |
+
value=4)
|
24 |
+
|
25 |
+
options = []
|
26 |
+
for i in range(num_options):
|
27 |
+
option = st.text_input(f"Option {i+1}",
|
28 |
+
key=f"option_{i}")
|
29 |
+
if option:
|
30 |
+
options.append(option)
|
31 |
+
|
32 |
+
if st.button("Create Poll") and question and len(options) >= 2:
|
33 |
+
self.db.create_poll(
|
34 |
+
st.session_state.selected_course,
|
35 |
+
session_id,
|
36 |
+
question,
|
37 |
+
options,
|
38 |
+
st.session_state.user_id
|
39 |
+
)
|
40 |
+
st.success("Poll created successfully!")
|
41 |
+
st.rerun()
|
42 |
+
|
43 |
+
# Display active polls
|
44 |
+
active_polls = self.db.get_active_polls(session_id)
|
45 |
+
if active_polls:
|
46 |
+
st.subheader("Active Polls")
|
47 |
+
for poll in active_polls:
|
48 |
+
with st.expander(f"Poll: {poll['question']}", expanded=True):
|
49 |
+
# Display results
|
50 |
+
self._display_poll_results(poll)
|
51 |
+
|
52 |
+
if st.button("Close Poll",
|
53 |
+
key=f"close_{str(poll['_id'])}"):
|
54 |
+
self.db.close_poll(poll['_id'])
|
55 |
+
st.success("Poll closed successfully!")
|
56 |
+
st.rerun()
|
57 |
+
|
58 |
+
def display_student_interface(self, session_id):
|
59 |
+
"""Display the student interface for participating in polls"""
|
60 |
+
st.subheader("Live Polls")
|
61 |
+
|
62 |
+
active_polls = self.db.get_active_polls(session_id)
|
63 |
+
if not active_polls:
|
64 |
+
st.info("No active polls at the moment.")
|
65 |
+
return
|
66 |
+
|
67 |
+
for poll in active_polls:
|
68 |
+
with st.expander(f"Poll: {poll['question']}", expanded=True):
|
69 |
+
selected_option = st.radio(
|
70 |
+
"Your response:",
|
71 |
+
options=poll['options'],
|
72 |
+
key=f"poll_{str(poll['_id'])}"
|
73 |
+
)
|
74 |
+
|
75 |
+
if st.button("Submit Response",
|
76 |
+
key=f"submit_{str(poll['_id'])}"):
|
77 |
+
success, message = self.db.submit_response(
|
78 |
+
poll['_id'],
|
79 |
+
st.session_state.user_id,
|
80 |
+
selected_option
|
81 |
+
)
|
82 |
+
if success:
|
83 |
+
st.success(message)
|
84 |
+
else:
|
85 |
+
st.warning(message)
|
86 |
+
st.rerun()
|
87 |
+
|
88 |
+
# self._display_poll_results(poll)
|
89 |
+
|
90 |
+
def _display_poll_results(self, poll):
|
91 |
+
"""Helper method to display poll results"""
|
92 |
+
responses_df = pd.DataFrame(
|
93 |
+
list(poll['responses'].items()),
|
94 |
+
columns=['Option', 'Votes']
|
95 |
+
)
|
96 |
+
|
97 |
+
total_votes = responses_df['Votes'].sum()
|
98 |
+
|
99 |
+
# Calculate percentages
|
100 |
+
if total_votes > 0:
|
101 |
+
responses_df['Percentage'] = (
|
102 |
+
responses_df['Votes'] / total_votes * 100
|
103 |
+
).round(1)
|
104 |
+
else:
|
105 |
+
responses_df['Percentage'] = 0
|
106 |
+
|
107 |
+
# Display metrics
|
108 |
+
st.metric("Total Responses", total_votes)
|
109 |
+
|
110 |
+
# Display charts
|
111 |
+
st.bar_chart(responses_df.set_index('Option')['Votes'])
|
112 |
+
|
113 |
+
# Display detailed statistics
|
114 |
+
if st.session_state.user_type == 'faculty':
|
115 |
+
st.dataframe(responses_df)
|
loldude.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
+
import plotly.express as px
|
7 |
+
import plotly.graph_objects as go
|
8 |
+
from collections import defaultdict
|
9 |
+
|
10 |
+
def load_and_preprocess_data(uploaded_file):
|
11 |
+
"""Load and preprocess the CSV data."""
|
12 |
+
df = pd.read_csv(uploaded_file)
|
13 |
+
# Combine relevant text fields for similarity comparison
|
14 |
+
df['combined_text'] = df['Title'] + ' ' + df['Abstract'] + ' ' + df['Keywords']
|
15 |
+
return df
|
16 |
+
|
17 |
+
def calculate_similarity_matrix(df):
|
18 |
+
"""Calculate cosine similarity matrix based on combined text."""
|
19 |
+
tfidf = TfidfVectorizer(stop_words='english')
|
20 |
+
tfidf_matrix = tfidf.fit_transform(df['combined_text'])
|
21 |
+
similarity_matrix = cosine_similarity(tfidf_matrix)
|
22 |
+
return similarity_matrix
|
23 |
+
|
24 |
+
def find_similar_papers(similarity_matrix, df, threshold=0.7):
|
25 |
+
"""Find pairs of papers with similarity above threshold."""
|
26 |
+
similar_pairs = []
|
27 |
+
for i in range(len(similarity_matrix)):
|
28 |
+
for j in range(i + 1, len(similarity_matrix)):
|
29 |
+
similarity = similarity_matrix[i][j]
|
30 |
+
if similarity >= threshold:
|
31 |
+
similar_pairs.append({
|
32 |
+
'Paper 1': df.iloc[i]['Title'],
|
33 |
+
'Paper 2': df.iloc[j]['Title'],
|
34 |
+
'Similarity': similarity
|
35 |
+
})
|
36 |
+
return pd.DataFrame(similar_pairs)
|
37 |
+
|
38 |
+
def find_outliers(similarity_matrix, df, threshold=0.3):
|
39 |
+
"""Find papers with low average similarity to others."""
|
40 |
+
avg_similarities = np.mean(similarity_matrix, axis=1)
|
41 |
+
outliers = []
|
42 |
+
for i, avg_sim in enumerate(avg_similarities):
|
43 |
+
if avg_sim < threshold:
|
44 |
+
outliers.append({
|
45 |
+
'Title': df.iloc[i]['Title'],
|
46 |
+
'Average Similarity': avg_sim
|
47 |
+
})
|
48 |
+
return pd.DataFrame(outliers)
|
49 |
+
|
50 |
+
def create_similarity_heatmap(similarity_matrix, df):
|
51 |
+
"""Create a heatmap of similarity matrix."""
|
52 |
+
fig = go.Figure(data=go.Heatmap(
|
53 |
+
z=similarity_matrix,
|
54 |
+
x=df['Title'],
|
55 |
+
y=df['Title'],
|
56 |
+
colorscale='Viridis'
|
57 |
+
))
|
58 |
+
fig.update_layout(
|
59 |
+
title='Paper Similarity Heatmap',
|
60 |
+
xaxis_tickangle=-45,
|
61 |
+
height=800
|
62 |
+
)
|
63 |
+
return fig
|
64 |
+
|
65 |
+
def analyze_keywords(df):
|
66 |
+
"""Analyze keyword frequency across papers."""
|
67 |
+
keyword_freq = defaultdict(int)
|
68 |
+
for keywords in df['Keywords']:
|
69 |
+
if isinstance(keywords, str):
|
70 |
+
for keyword in keywords.split(','):
|
71 |
+
keyword = keyword.strip()
|
72 |
+
keyword_freq[keyword] += 1
|
73 |
+
|
74 |
+
keyword_df = pd.DataFrame([
|
75 |
+
{'Keyword': k, 'Frequency': v}
|
76 |
+
for k, v in keyword_freq.items()
|
77 |
+
]).sort_values('Frequency', ascending=False)
|
78 |
+
|
79 |
+
return keyword_df
|
80 |
+
|
81 |
+
def main():
|
82 |
+
st.title('Research Papers Similarity Analysis')
|
83 |
+
|
84 |
+
uploaded_file = st.file_uploader("Upload your research papers CSV file", type=['csv'])
|
85 |
+
|
86 |
+
if uploaded_file is not None:
|
87 |
+
df = load_and_preprocess_data(uploaded_file)
|
88 |
+
similarity_matrix = calculate_similarity_matrix(df)
|
89 |
+
|
90 |
+
st.header('Document Similarity Analysis')
|
91 |
+
|
92 |
+
# Similarity Heatmap
|
93 |
+
st.subheader('Similarity Heatmap')
|
94 |
+
heatmap = create_similarity_heatmap(similarity_matrix, df)
|
95 |
+
st.plotly_chart(heatmap, use_container_width=True)
|
96 |
+
|
97 |
+
# Similar Papers
|
98 |
+
st.subheader('Similar Papers')
|
99 |
+
similarity_threshold = st.slider('Similarity Threshold', 0.0, 1.0, 0.7)
|
100 |
+
similar_papers = find_similar_papers(similarity_matrix, df, similarity_threshold)
|
101 |
+
if not similar_papers.empty:
|
102 |
+
st.dataframe(similar_papers)
|
103 |
+
else:
|
104 |
+
st.write("No papers found above the similarity threshold.")
|
105 |
+
|
106 |
+
# Outliers
|
107 |
+
st.subheader('Outlier Papers')
|
108 |
+
outlier_threshold = st.slider('Outlier Threshold', 0.0, 1.0, 0.3)
|
109 |
+
outliers = find_outliers(similarity_matrix, df, outlier_threshold)
|
110 |
+
if not outliers.empty:
|
111 |
+
st.dataframe(outliers)
|
112 |
+
else:
|
113 |
+
st.write("No outliers found below the threshold.")
|
114 |
+
|
115 |
+
# Keyword Analysis
|
116 |
+
st.header('Keyword Analysis')
|
117 |
+
keyword_freq = analyze_keywords(df)
|
118 |
+
if not keyword_freq.empty:
|
119 |
+
fig = px.bar(keyword_freq, x='Keyword', y='Frequency',
|
120 |
+
title='Keyword Frequency Across Papers')
|
121 |
+
fig.update_xaxes(tickangle=45)
|
122 |
+
st.plotly_chart(fig, use_container_width=True)
|
123 |
+
|
124 |
+
# Basic Statistics
|
125 |
+
st.header('Basic Statistics')
|
126 |
+
col1, col2 = st.columns(2)
|
127 |
+
with col1:
|
128 |
+
st.metric("Total Papers", len(df))
|
129 |
+
st.metric("Average Similarity", f"{np.mean(similarity_matrix):.2f}")
|
130 |
+
with col2:
|
131 |
+
st.metric("Unique Keywords", len(keyword_freq))
|
132 |
+
st.metric("Max Similarity", f"{np.max(similarity_matrix[~np.eye(similarity_matrix.shape[0], dtype=bool)]):.2f}")
|
133 |
+
|
134 |
+
if __name__ == "__main__":
|
135 |
+
main()
|
modify_schema.py
ADDED
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from db import courses_collection2
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
import os
|
4 |
+
from pymongo import MongoClient
|
5 |
+
from datetime import datetime
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
MONGO_URI = os.getenv("MONGO_URI")
|
11 |
+
|
12 |
+
client = MongoClient(MONGO_URI)
|
13 |
+
db = client["novascholar_db"]
|
14 |
+
|
15 |
+
# Define the updated course schema
|
16 |
+
updated_course_schema = {
|
17 |
+
"bsonType": "object",
|
18 |
+
"required": [
|
19 |
+
"course_id",
|
20 |
+
"title",
|
21 |
+
"description",
|
22 |
+
"faculty",
|
23 |
+
"faculty_id",
|
24 |
+
"duration",
|
25 |
+
"created_at",
|
26 |
+
],
|
27 |
+
"properties": {
|
28 |
+
"course_id": {
|
29 |
+
"bsonType": "string",
|
30 |
+
"description": "Unique identifier for the course",
|
31 |
+
},
|
32 |
+
"title": {"bsonType": "string", "description": "Title of the course"},
|
33 |
+
"description": {
|
34 |
+
"bsonType": "string",
|
35 |
+
"description": "Description of the course",
|
36 |
+
},
|
37 |
+
"faculty": {"bsonType": "string", "description": "Name of the faculty"},
|
38 |
+
"duration": {"bsonType": "string", "description": "Duration of the course"},
|
39 |
+
"created_at": {
|
40 |
+
"bsonType": "date",
|
41 |
+
"description": "Date when the course was created",
|
42 |
+
},
|
43 |
+
"sessions": {
|
44 |
+
"bsonType": "array",
|
45 |
+
"description": "List of sessions associated with the course",
|
46 |
+
"items": {
|
47 |
+
"bsonType": "object",
|
48 |
+
"required": ["session_id", "title", "date"],
|
49 |
+
"properties": {
|
50 |
+
"session_id": {
|
51 |
+
"bsonType": "string",
|
52 |
+
"description": "Unique identifier for the session",
|
53 |
+
},
|
54 |
+
"title": {
|
55 |
+
"bsonType": "string",
|
56 |
+
"description": "Title of the session",
|
57 |
+
},
|
58 |
+
"date": {"bsonType": "date", "description": "Date of the session"},
|
59 |
+
"status": {
|
60 |
+
"bsonType": "string",
|
61 |
+
"description": "Status of the session (e.g., completed, upcoming)",
|
62 |
+
},
|
63 |
+
"created_at": {
|
64 |
+
"bsonType": "date",
|
65 |
+
"description": "Date when the session was created",
|
66 |
+
},
|
67 |
+
"pre_class": {
|
68 |
+
"bsonType": "object",
|
69 |
+
"description": "Pre-class segment data",
|
70 |
+
"properties": {
|
71 |
+
"resources": {
|
72 |
+
"bsonType": "array",
|
73 |
+
"description": "List of pre-class resources",
|
74 |
+
"items": {
|
75 |
+
"bsonType": "object",
|
76 |
+
"required": ["type", "title", "url"],
|
77 |
+
"properties": {
|
78 |
+
"type": {
|
79 |
+
"bsonType": "string",
|
80 |
+
"description": "Type of resource (e.g., pdf, video)",
|
81 |
+
},
|
82 |
+
"title": {
|
83 |
+
"bsonType": "string",
|
84 |
+
"description": "Title of the resource",
|
85 |
+
},
|
86 |
+
"url": {
|
87 |
+
"bsonType": "string",
|
88 |
+
"description": "URL of the resource",
|
89 |
+
},
|
90 |
+
"vector": {
|
91 |
+
"bsonType": "array",
|
92 |
+
"description": "Vector representation of the resource",
|
93 |
+
"items": {"bsonType": "double"},
|
94 |
+
},
|
95 |
+
},
|
96 |
+
},
|
97 |
+
},
|
98 |
+
"completion_required": {
|
99 |
+
"bsonType": "bool",
|
100 |
+
"description": "Indicates if completion of pre-class resources is required",
|
101 |
+
},
|
102 |
+
},
|
103 |
+
},
|
104 |
+
"in_class": {
|
105 |
+
"bsonType": "object",
|
106 |
+
"description": "In-class segment data",
|
107 |
+
"properties": {
|
108 |
+
"topics": {
|
109 |
+
"bsonType": "array",
|
110 |
+
"description": "List of topics covered in the session",
|
111 |
+
"items": {"bsonType": "string"},
|
112 |
+
},
|
113 |
+
"quiz": {
|
114 |
+
"bsonType": "object",
|
115 |
+
"description": "Quiz data",
|
116 |
+
"properties": {
|
117 |
+
"title": {
|
118 |
+
"bsonType": "string",
|
119 |
+
"description": "Title of the quiz",
|
120 |
+
},
|
121 |
+
"questions": {
|
122 |
+
"bsonType": "int",
|
123 |
+
"description": "Number of questions in the quiz",
|
124 |
+
},
|
125 |
+
"duration": {
|
126 |
+
"bsonType": "int",
|
127 |
+
"description": "Duration of the quiz in minutes",
|
128 |
+
},
|
129 |
+
},
|
130 |
+
},
|
131 |
+
"polls": {
|
132 |
+
"bsonType": "array",
|
133 |
+
"description": "List of polls conducted during the session",
|
134 |
+
"items": {
|
135 |
+
"bsonType": "object",
|
136 |
+
"required": ["question", "options"],
|
137 |
+
"properties": {
|
138 |
+
"question": {
|
139 |
+
"bsonType": "string",
|
140 |
+
"description": "Poll question",
|
141 |
+
},
|
142 |
+
"options": {
|
143 |
+
"bsonType": "array",
|
144 |
+
"description": "List of poll options",
|
145 |
+
"items": {"bsonType": "string"},
|
146 |
+
},
|
147 |
+
"responses": {
|
148 |
+
"bsonType": "object",
|
149 |
+
"description": "Responses to the poll",
|
150 |
+
"additionalProperties": {"bsonType": "int"},
|
151 |
+
},
|
152 |
+
},
|
153 |
+
},
|
154 |
+
},
|
155 |
+
},
|
156 |
+
},
|
157 |
+
"post_class": {
|
158 |
+
"bsonType": "object",
|
159 |
+
"description": "Post-class segment data",
|
160 |
+
"properties": {
|
161 |
+
"assignments": {
|
162 |
+
"bsonType": "array",
|
163 |
+
"description": "List of assignments",
|
164 |
+
"items": {
|
165 |
+
"bsonType": "object",
|
166 |
+
"required": ["id", "title", "due_date", "status"],
|
167 |
+
"properties": {
|
168 |
+
"id": {
|
169 |
+
"bsonType": ["objectId", "int"],
|
170 |
+
"description": "Assignment ID",
|
171 |
+
},
|
172 |
+
"title": {
|
173 |
+
"bsonType": "string",
|
174 |
+
"description": "Title of the assignment",
|
175 |
+
},
|
176 |
+
"due_date": {
|
177 |
+
"bsonType": "date",
|
178 |
+
"description": "Due date of the assignment",
|
179 |
+
},
|
180 |
+
"status": {
|
181 |
+
"bsonType": "string",
|
182 |
+
"description": "Status of the assignment (e.g., pending, completed)",
|
183 |
+
},
|
184 |
+
"submissions": {
|
185 |
+
"bsonType": "array",
|
186 |
+
"description": "List of submissions",
|
187 |
+
"items": {
|
188 |
+
"bsonType": "object",
|
189 |
+
"properties": {
|
190 |
+
"student_id": {
|
191 |
+
"bsonType": "objectId",
|
192 |
+
"description": "ID of the student who submitted the assignment",
|
193 |
+
},
|
194 |
+
"file_url": {
|
195 |
+
"bsonType": "string",
|
196 |
+
"description": "URL of the submitted file",
|
197 |
+
},
|
198 |
+
"submitted_at": {
|
199 |
+
"bsonType": "date",
|
200 |
+
"description": "Date when the assignment was submitted",
|
201 |
+
},
|
202 |
+
},
|
203 |
+
},
|
204 |
+
},
|
205 |
+
},
|
206 |
+
},
|
207 |
+
}
|
208 |
+
},
|
209 |
+
},
|
210 |
+
},
|
211 |
+
},
|
212 |
+
},
|
213 |
+
},
|
214 |
+
}
|
215 |
+
|
216 |
+
# Update the schema using the collMod command
|
217 |
+
db.command({
|
218 |
+
"collMod": "courses_collection2",
|
219 |
+
"validator": {"$jsonSchema": updated_course_schema}
|
220 |
+
})
|
221 |
+
|
222 |
+
print("Schema updated successfully!")
|
new_keywords.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from pymongo import MongoClient
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import os
|
6 |
+
import json
|
7 |
+
import re
|
8 |
+
|
9 |
+
# 1. Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
MONGODB_URI = os.getenv(
|
12 |
+
"MONGODB_UR",
|
13 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
14 |
+
)
|
15 |
+
# 2. Create MongoDB connection
|
16 |
+
client = MongoClient(MONGODB_URI)
|
17 |
+
db = client["novascholar_db"]
|
18 |
+
collection = db["research_papers"]
|
19 |
+
|
20 |
+
|
21 |
+
def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
|
22 |
+
"""
|
23 |
+
Convert any columns that contain lists into comma-separated strings
|
24 |
+
to ensure consistent data types for CSV export.
|
25 |
+
"""
|
26 |
+
for col in df.columns:
|
27 |
+
if any(isinstance(val, list) for val in df[col].dropna()):
|
28 |
+
df[col] = df[col].apply(
|
29 |
+
lambda x: (
|
30 |
+
", ".join(map(str, x))
|
31 |
+
if isinstance(x, list)
|
32 |
+
else (str(x) if pd.notna(x) else "")
|
33 |
+
)
|
34 |
+
)
|
35 |
+
return df
|
36 |
+
|
37 |
+
|
38 |
+
def filter_and_export_collection_to_csv(keywords_list, doc_collection):
|
39 |
+
"""
|
40 |
+
Fetch documents from the specified collection where the 'Keywords' field
|
41 |
+
matches ANY of the keywords in 'keywords_list'. Convert to DataFrame,
|
42 |
+
ensure consistent column types, save to CSV, and return the DataFrame
|
43 |
+
and CSV filename.
|
44 |
+
"""
|
45 |
+
# 3. Retrieve filtered documents from the collection based on 'Keywords' using $in with regex for substring matching
|
46 |
+
regex_keywords = [f".*{keyword}.*" for keyword in keywords_list]
|
47 |
+
docs = list(
|
48 |
+
doc_collection.find(
|
49 |
+
{"Keywords": {"$regex": "|".join(regex_keywords), "$options": "i"}}
|
50 |
+
)
|
51 |
+
)
|
52 |
+
|
53 |
+
# Convert documents to DataFrame
|
54 |
+
df = pd.DataFrame(docs)
|
55 |
+
|
56 |
+
if not df.empty:
|
57 |
+
# 4. Convert mixed columns
|
58 |
+
df = convert_mixed_columns(df)
|
59 |
+
# 5. Export to CSV
|
60 |
+
csv_filename = "filtered_papers_export.csv"
|
61 |
+
df.to_csv(csv_filename, index=False)
|
62 |
+
return df, csv_filename
|
63 |
+
else:
|
64 |
+
# Return an empty DataFrame and None if no documents found
|
65 |
+
return pd.DataFrame(), None
|
66 |
+
|
67 |
+
|
68 |
+
def main():
|
69 |
+
st.title("Filter and Export Papers by Keyword")
|
70 |
+
|
71 |
+
# Let user select the paper type
|
72 |
+
paper_type = st.selectbox(
|
73 |
+
"Select type of research paper:",
|
74 |
+
[
|
75 |
+
"Review Based Paper",
|
76 |
+
"Opinion/Perspective Based Paper",
|
77 |
+
"Empirical Research Paper",
|
78 |
+
"Research Paper (Other)",
|
79 |
+
],
|
80 |
+
)
|
81 |
+
|
82 |
+
# Let user enter the keyword to filter
|
83 |
+
keyword_input = st.text_input(
|
84 |
+
"Enter the exact keyword to filter papers by 'Keywords' field:"
|
85 |
+
)
|
86 |
+
|
87 |
+
# When user clicks button, use the collection for the selected paper type
|
88 |
+
if st.button("Export Filtered Papers to CSV"):
|
89 |
+
with st.spinner("Exporting filtered documents..."):
|
90 |
+
try:
|
91 |
+
# Determine dynamic collection based on paper type
|
92 |
+
collection_name = paper_type.replace(" ", "_").lower()
|
93 |
+
doc_collection = db[collection_name]
|
94 |
+
|
95 |
+
# Split keywords by commas and strip whitespace
|
96 |
+
keywords_list = [
|
97 |
+
kw.strip() for kw in keyword_input.split(",") if kw.strip()
|
98 |
+
]
|
99 |
+
|
100 |
+
if not keywords_list:
|
101 |
+
st.warning("Please enter at least one keyword.")
|
102 |
+
else:
|
103 |
+
df, csv_filename = filter_and_export_collection_to_csv(
|
104 |
+
keywords_list, doc_collection
|
105 |
+
)
|
106 |
+
if not df.empty and csv_filename:
|
107 |
+
st.success(
|
108 |
+
f"Successfully exported filtered papers to {csv_filename}!"
|
109 |
+
)
|
110 |
+
st.download_button(
|
111 |
+
label="Download CSV",
|
112 |
+
data=df.to_csv(index=False).encode("utf-8"),
|
113 |
+
file_name=csv_filename,
|
114 |
+
mime="text/csv",
|
115 |
+
)
|
116 |
+
st.write("Preview of the filtered DataFrame:")
|
117 |
+
st.dataframe(df)
|
118 |
+
else:
|
119 |
+
st.warning(
|
120 |
+
"No matching documents found for the provided keyword(s)."
|
121 |
+
)
|
122 |
+
except Exception as e:
|
123 |
+
st.error(f"Error exporting filtered papers: {str(e)}")
|
124 |
+
|
125 |
+
|
126 |
+
if __name__ == "__main__":
|
127 |
+
main()
|
new_research_paper.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import requests
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
11 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
12 |
+
|
13 |
+
|
14 |
+
def call_perplexity_api(prompt: str) -> str:
|
15 |
+
"""Call Perplexity AI with a prompt, return the text response if successful."""
|
16 |
+
headers = {
|
17 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
18 |
+
"Content-Type": "application/json",
|
19 |
+
}
|
20 |
+
|
21 |
+
payload = {
|
22 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
23 |
+
"messages": [{"role": "user", "content": prompt}],
|
24 |
+
"temperature": 0.3,
|
25 |
+
}
|
26 |
+
|
27 |
+
try:
|
28 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
29 |
+
response.raise_for_status()
|
30 |
+
return response.json()["choices"][0]["message"]["content"]
|
31 |
+
except Exception as e:
|
32 |
+
st.error(f"API Error: {str(e)}")
|
33 |
+
return ""
|
34 |
+
|
35 |
+
|
36 |
+
def generate_research_paper(df: pd.DataFrame) -> dict:
|
37 |
+
"""
|
38 |
+
For each column in the DataFrame, generate a research paper section (200-500 words)
|
39 |
+
that addresses the data in that column. Return a dict mapping column -> text.
|
40 |
+
"""
|
41 |
+
paper_sections = {}
|
42 |
+
for col in df.columns:
|
43 |
+
# Convert all non-null rows in the column to strings and join them for context
|
44 |
+
col_values = df[col].dropna().astype(str).tolist()
|
45 |
+
# We'll truncate if this is huge
|
46 |
+
sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
|
47 |
+
prompt = f"""
|
48 |
+
Topic: {col}
|
49 |
+
Data Sample: {sample_text}
|
50 |
+
|
51 |
+
Generate a professional research paper section for the above column.
|
52 |
+
The section should be at least 100 words and at most 150 words,
|
53 |
+
focusing on key insights, challenges, and potential research angles.
|
54 |
+
Integrate the data samples as context for the content.
|
55 |
+
"""
|
56 |
+
section_text = call_perplexity_api(prompt)
|
57 |
+
paper_sections[col] = section_text.strip() if section_text else ""
|
58 |
+
return paper_sections
|
59 |
+
|
60 |
+
|
61 |
+
def format_paper(paper_dict: dict) -> str:
|
62 |
+
"""
|
63 |
+
Format the generated paper into a Markdown string.
|
64 |
+
Each column name is used as a heading, and the text is placed under it.
|
65 |
+
"""
|
66 |
+
md_text = "# Generated Research Paper\n\n"
|
67 |
+
for col, content in paper_dict.items():
|
68 |
+
md_text += f"## {col}\n{content}\n\n"
|
69 |
+
return md_text
|
70 |
+
|
71 |
+
|
72 |
+
def main():
|
73 |
+
st.title("Corpus-based Research Paper Generator")
|
74 |
+
|
75 |
+
uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
|
76 |
+
if uploaded_file:
|
77 |
+
df = pd.read_csv(uploaded_file)
|
78 |
+
st.write("### Preview of Uploaded Data")
|
79 |
+
st.dataframe(df.head())
|
80 |
+
|
81 |
+
if st.button("Generate Research Paper"):
|
82 |
+
st.info("Generating paper based on the columns of your corpus...")
|
83 |
+
with st.spinner("Calling Perplexity AI..."):
|
84 |
+
paper = generate_research_paper(df)
|
85 |
+
if paper:
|
86 |
+
formatted_paper = format_paper(paper)
|
87 |
+
st.success("Research Paper Generated Successfully!")
|
88 |
+
st.write(formatted_paper)
|
89 |
+
|
90 |
+
st.download_button(
|
91 |
+
label="Download Paper as Markdown",
|
92 |
+
data=formatted_paper,
|
93 |
+
file_name="research_paper.md",
|
94 |
+
mime="text/markdown",
|
95 |
+
)
|
96 |
+
else:
|
97 |
+
st.error(
|
98 |
+
"Paper generation failed. Please check Perplexity API key."
|
99 |
+
)
|
100 |
+
|
101 |
+
|
102 |
+
if __name__ == "__main__":
|
103 |
+
main()
|
poll_db_operations.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pymongo import MongoClient
|
2 |
+
from datetime import datetime
|
3 |
+
from bson import ObjectId
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import os
|
6 |
+
|
7 |
+
load_dotenv()
|
8 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
9 |
+
class PollDatabase:
|
10 |
+
def __init__(self):
|
11 |
+
self.client = MongoClient(MONGO_URI)
|
12 |
+
self.db = self.client["novascholar_db"]
|
13 |
+
|
14 |
+
def create_poll(self, course_id, session_id, question, options, faculty_id):
|
15 |
+
"""Create a new poll"""
|
16 |
+
poll = {
|
17 |
+
"course_id": course_id,
|
18 |
+
"session_id": session_id,
|
19 |
+
"faculty_id": faculty_id,
|
20 |
+
"question": question,
|
21 |
+
"options": options,
|
22 |
+
"status": "active",
|
23 |
+
"created_at": datetime.now(),
|
24 |
+
"responses": {option: 0 for option in options}
|
25 |
+
}
|
26 |
+
return self.db.polls.insert_one(poll)
|
27 |
+
|
28 |
+
def get_active_polls(self, session_id):
|
29 |
+
"""Get all active polls for a session"""
|
30 |
+
return list(self.db.polls.find({
|
31 |
+
"session_id": session_id,
|
32 |
+
"status": "active"
|
33 |
+
}))
|
34 |
+
|
35 |
+
def submit_response(self, poll_id, student_id, selected_option):
|
36 |
+
"""Submit a student's response to a poll"""
|
37 |
+
try:
|
38 |
+
# Record individual response
|
39 |
+
response = {
|
40 |
+
"poll_id": poll_id,
|
41 |
+
"student_id": student_id,
|
42 |
+
"selected_option": selected_option,
|
43 |
+
"submitted_at": datetime.now()
|
44 |
+
}
|
45 |
+
self.db.poll_responses.insert_one(response)
|
46 |
+
|
47 |
+
# Update aggregated results
|
48 |
+
self.db.polls.update_one(
|
49 |
+
{"_id": ObjectId(poll_id)},
|
50 |
+
{"$inc": {f"responses.{selected_option}": 1}}
|
51 |
+
)
|
52 |
+
return True, "Vote recorded successfully"
|
53 |
+
|
54 |
+
except Exception as e:
|
55 |
+
if "duplicate key error" in str(e):
|
56 |
+
return False, "You have already voted in this poll"
|
57 |
+
return False, f"Error recording vote: {str(e)}"
|
58 |
+
|
59 |
+
def close_poll(self, poll_id):
|
60 |
+
"""Close a poll"""
|
61 |
+
return self.db.polls.update_one(
|
62 |
+
{"_id": ObjectId(poll_id)},
|
63 |
+
{"$set": {"status": "closed"}}
|
64 |
+
)
|
65 |
+
|
66 |
+
def get_poll_analytics(self, poll_id):
|
67 |
+
"""Get detailed analytics for a poll"""
|
68 |
+
poll = self.db.polls.find_one({"_id": ObjectId(poll_id)})
|
69 |
+
responses = self.db.poll_responses.find({"poll_id": ObjectId(poll_id)})
|
70 |
+
return poll, list(responses)
|
poll_db_setup.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pymongo import MongoClient
|
2 |
+
from datetime import datetime
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
8 |
+
def setup_mongodb():
|
9 |
+
"""Initialize MongoDB connection and create collections with indexes"""
|
10 |
+
client = MongoClient(MONGO_URI)
|
11 |
+
db = client["novascholar_db"]
|
12 |
+
|
13 |
+
# Create indexes for polls collection
|
14 |
+
db.polls.create_index([("session_id", 1), ("status", 1)])
|
15 |
+
db.polls.create_index([("course_id", 1)])
|
16 |
+
|
17 |
+
# Create unique index for poll_responses to prevent duplicate votes
|
18 |
+
db.poll_responses.create_index(
|
19 |
+
[("poll_id", 1), ("student_id", 1)],
|
20 |
+
unique=True
|
21 |
+
)
|
22 |
+
|
23 |
+
return "Database setup completed successfully"
|
24 |
+
|
25 |
+
def print_all_polls():
|
26 |
+
"""Print all polls in the database"""
|
27 |
+
client = MongoClient(MONGO_URI)
|
28 |
+
db = client["novascholar_db"]
|
29 |
+
|
30 |
+
polls = db.polls.find()
|
31 |
+
for poll in polls:
|
32 |
+
print(poll)
|
33 |
+
|
34 |
+
if __name__ == "__main__":
|
35 |
+
print(print_all_polls())
|
pre_class_analytics2.py
ADDED
@@ -0,0 +1,759 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import typing_extensions as typing
|
3 |
+
import google.generativeai as genai
|
4 |
+
from typing import List, Dict, Any
|
5 |
+
import numpy as np
|
6 |
+
from collections import defaultdict
|
7 |
+
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
import os
|
10 |
+
import pymongo
|
11 |
+
from pymongo import MongoClient
|
12 |
+
|
13 |
+
load_dotenv()
|
14 |
+
GEMINI_API_KEY = os.getenv('GEMINI_KEY')
|
15 |
+
|
16 |
+
class EngagementMetrics(typing.TypedDict):
|
17 |
+
participation_level: str # "high" | "medium" | "low"
|
18 |
+
question_quality: str # "advanced" | "intermediate" | "basic"
|
19 |
+
concept_understanding: str # "strong" | "moderate" | "needs_improvement"
|
20 |
+
|
21 |
+
class StudentInsight(typing.TypedDict):
|
22 |
+
student_id: str
|
23 |
+
performance_level: str # "high_performer" | "average" | "at_risk"
|
24 |
+
struggling_topics: list[str]
|
25 |
+
engagement_metrics: EngagementMetrics
|
26 |
+
|
27 |
+
class TopicInsight(typing.TypedDict):
|
28 |
+
topic: str
|
29 |
+
difficulty_level: float # 0 to 1
|
30 |
+
student_count: int
|
31 |
+
common_issues: list[str]
|
32 |
+
key_misconceptions: list[str]
|
33 |
+
|
34 |
+
class RecommendedAction(typing.TypedDict):
|
35 |
+
action: str
|
36 |
+
priority: str # "high" | "medium" | "low"
|
37 |
+
target_group: str # "all_students" | "specific_students" | "faculty"
|
38 |
+
reasoning: str
|
39 |
+
expected_impact: str
|
40 |
+
|
41 |
+
class ClassDistribution(typing.TypedDict):
|
42 |
+
high_performers: float
|
43 |
+
average_performers: float
|
44 |
+
at_risk: float
|
45 |
+
|
46 |
+
class CourseHealth(typing.TypedDict):
|
47 |
+
overall_engagement: float # 0 to 1
|
48 |
+
critical_topics: list[str]
|
49 |
+
class_distribution: ClassDistribution
|
50 |
+
|
51 |
+
class InterventionMetrics(typing.TypedDict):
|
52 |
+
immediate_attention_needed: list[str] # student_ids
|
53 |
+
monitoring_required: list[str] # student_ids
|
54 |
+
|
55 |
+
class AnalyticsResponse(typing.TypedDict):
|
56 |
+
topic_insights: list[TopicInsight]
|
57 |
+
student_insights: list[StudentInsight]
|
58 |
+
recommended_actions: list[RecommendedAction]
|
59 |
+
course_health: CourseHealth
|
60 |
+
intervention_metrics: InterventionMetrics
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
class NovaScholarAnalytics:
|
65 |
+
def __init__(self, model_name: str = "gemini-1.5-flash"):
|
66 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
67 |
+
self.model = genai.GenerativeModel(model_name)
|
68 |
+
|
69 |
+
def _create_analytics_prompt(self, chat_histories: List[Dict], all_topics: List[str]) -> str:
|
70 |
+
"""Creates a structured prompt for Gemini to analyze chat histories."""
|
71 |
+
# Prompt 1:
|
72 |
+
# return f"""Analyze these student chat histories for a university course and provide detailed analytics.
|
73 |
+
|
74 |
+
# Context:
|
75 |
+
# - These are pre-class chat interactions between students and an AI tutor
|
76 |
+
# - Topics covered: {', '.join(all_topics)}
|
77 |
+
|
78 |
+
# Chat histories: {json.dumps(chat_histories, indent=2)}
|
79 |
+
|
80 |
+
# Return the analysis in JSON format matching this exact schema:
|
81 |
+
# {AnalyticsResponse.__annotations__}
|
82 |
+
|
83 |
+
# Ensure all numeric values are between 0 and 1 (accuracy upto 3 decimal places) where applicable.
|
84 |
+
|
85 |
+
# Important analysis guidelines:
|
86 |
+
# 1. Identify topics where students show confusion or ask multiple follow-up questions
|
87 |
+
# 2. Look for patterns in question types and complexity
|
88 |
+
# 3. Analyze response understanding based on follow-up questions
|
89 |
+
# 4. Consider both explicit and implicit signs of difficulty
|
90 |
+
# 5. Focus on concept relationships and prerequisite understanding"""
|
91 |
+
|
92 |
+
# Prompt 2:
|
93 |
+
# return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
|
94 |
+
|
95 |
+
# Context:
|
96 |
+
# - Chat histories: {json.dumps(chat_histories, indent=2)}
|
97 |
+
# - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
|
98 |
+
# - Topics covered: {', '.join(all_topics)}.
|
99 |
+
|
100 |
+
# Your task is to extract key insights that will help faculty address challenges effectively and enhance learning outcomes.
|
101 |
+
|
102 |
+
# Output Format:
|
103 |
+
# 1. Topics where students face significant difficulties:
|
104 |
+
# - Provide a ranked list of topics where the majority of students are struggling, based on the frequency and nature of their questions or misconceptions.
|
105 |
+
# - Include the percentage of students who found each topic challenging.
|
106 |
+
|
107 |
+
# 2. AI-recommended actions for faculty:
|
108 |
+
# - Suggest actionable steps to address the difficulties identified in each critical topic.
|
109 |
+
# - Specify the priority of each action (high, medium, low) based on the urgency and impact.
|
110 |
+
# - Explain the reasoning behind each recommendation and its expected impact on student outcomes.
|
111 |
+
|
112 |
+
# 3. Student-specific analytics (focusing on at-risk students):
|
113 |
+
# - Identify students categorized as "at-risk" based on their engagement levels, question complexity, and recurring struggles.
|
114 |
+
# - For each at-risk student, list their top 3 struggling topics and their engagement metrics (participation level, concept understanding).
|
115 |
+
# - Provide personalized recommendations for improving their understanding.
|
116 |
+
|
117 |
+
# Guidelines for Analysis:
|
118 |
+
# - Focus on actionable and concise insights rather than exhaustive details.
|
119 |
+
# - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
|
120 |
+
# - Prioritize topics with higher difficulty scores or more students struggling.
|
121 |
+
# - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
|
122 |
+
|
123 |
+
# The response must be well-structured, concise, and highly actionable for faculty to implement improvements effectively."""
|
124 |
+
|
125 |
+
# Prompt 3:
|
126 |
+
return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
|
127 |
+
Context:
|
128 |
+
- Chat histories: {json.dumps(chat_histories, indent=2)}
|
129 |
+
- These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
|
130 |
+
- Topics covered: {', '.join(all_topics)}.
|
131 |
+
|
132 |
+
Your task is to provide detailed analytics that will help faculty address challenges effectively and enhance learning outcomes.
|
133 |
+
|
134 |
+
Output Format (strictly follow this JSON structure):
|
135 |
+
{{
|
136 |
+
"topic_wise_insights": [
|
137 |
+
{{
|
138 |
+
"topic": "<string>",
|
139 |
+
"struggling_percentage": <number between 0 and 1>,
|
140 |
+
"key_issues": ["<string>", "<string>", ...],
|
141 |
+
"key_misconceptions": ["<string>", "<string>", ...],
|
142 |
+
"recommended_actions": {{
|
143 |
+
"description": "<string>",
|
144 |
+
"priority": "high|medium|low",
|
145 |
+
"expected_outcome": "<string>"
|
146 |
+
}}
|
147 |
+
}}
|
148 |
+
],
|
149 |
+
"ai_recommended_actions": [
|
150 |
+
{{
|
151 |
+
"action": "<string>",
|
152 |
+
"priority": "high|medium|low",
|
153 |
+
"reasoning": "<string>",
|
154 |
+
"expected_outcome": "<string>",
|
155 |
+
"pedagogy_recommendations": {{
|
156 |
+
"methods": ["<string>", "<string>", ...],
|
157 |
+
"resources": ["<string>", "<string>", ...],
|
158 |
+
"expected_impact": "<string>"
|
159 |
+
}}
|
160 |
+
}}
|
161 |
+
],
|
162 |
+
"student_analytics": [
|
163 |
+
{{
|
164 |
+
"student_id": "<string>",
|
165 |
+
"engagement_metrics": {{
|
166 |
+
"participation_level": <number between 0 and 1>,
|
167 |
+
"concept_understanding": "strong|moderate|needs_improvement",
|
168 |
+
"question_quality": "advanced|intermediate|basic"
|
169 |
+
}},
|
170 |
+
"struggling_topics": ["<string>", "<string>", ...],
|
171 |
+
"personalized_recommendation": "<string>"
|
172 |
+
}}
|
173 |
+
]
|
174 |
+
}}
|
175 |
+
|
176 |
+
Guidelines for Analysis:
|
177 |
+
- Focus on actionable and concise insights rather than exhaustive details.
|
178 |
+
- Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
|
179 |
+
- Prioritize topics with higher difficulty scores or more students struggling.
|
180 |
+
- Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
|
181 |
+
- Make sure to include All** students in the analysis, not just a subset.
|
182 |
+
- for the ai_recommended_actions:
|
183 |
+
- Prioritize pedagogy recommendations for critical topics with the high difficulty scores or struggling percentages.
|
184 |
+
- For each action:
|
185 |
+
- Include specific teaching methods (e.g., interactive discussions or quizzes, problem-based learning, practical examples etc).
|
186 |
+
- Recommend supporting resources (e.g., videos, handouts, simulations).
|
187 |
+
- Provide reasoning for the recommendation and the expected outcomes for student learning.
|
188 |
+
- Example:
|
189 |
+
- **Action:** Conduct an interactive problem-solving session on "<Topic Name>".
|
190 |
+
- **Reasoning:** Students showed difficulty in applying concepts to practical problems.
|
191 |
+
- **Expected Outcome:** Improved practical understanding and application of the topic.
|
192 |
+
- **Pedagogy Recommendations:**
|
193 |
+
- **Methods:** Group discussions, real-world case studies.
|
194 |
+
- **Resources:** Online interactive tools, relevant case studies, video walkthroughs.
|
195 |
+
- **Expected Impact:** Enhance conceptual clarity by 40% and practical application by 30%.
|
196 |
+
|
197 |
+
The response must adhere strictly to the above JSON structure, with all fields populated appropriately."""
|
198 |
+
|
199 |
+
|
200 |
+
def _calculate_class_distribution(self, analytics: Dict) -> Dict:
|
201 |
+
"""Calculate the distribution of students across performance levels."""
|
202 |
+
try:
|
203 |
+
total_students = len(analytics.get("student_insights", []))
|
204 |
+
if total_students == 0:
|
205 |
+
return {
|
206 |
+
"high_performers": 0,
|
207 |
+
"average_performers": 0,
|
208 |
+
"at_risk": 0
|
209 |
+
}
|
210 |
+
|
211 |
+
distribution = defaultdict(int)
|
212 |
+
|
213 |
+
for student in analytics.get("student_insights", []):
|
214 |
+
performance_level = student.get("performance_level", "average")
|
215 |
+
# Map performance levels to our three categories
|
216 |
+
if performance_level in ["excellent", "high", "high_performer"]:
|
217 |
+
distribution["high_performers"] += 1
|
218 |
+
elif performance_level in ["struggling", "low", "at_risk"]:
|
219 |
+
distribution["at_risk"] += 1
|
220 |
+
else:
|
221 |
+
distribution["average_performers"] += 1
|
222 |
+
|
223 |
+
# Convert to percentages
|
224 |
+
return {
|
225 |
+
level: count/total_students
|
226 |
+
for level, count in distribution.items()
|
227 |
+
}
|
228 |
+
except Exception as e:
|
229 |
+
print(f"Error calculating class distribution: {str(e)}")
|
230 |
+
return {
|
231 |
+
"high_performers": 0,
|
232 |
+
"average_performers": 0,
|
233 |
+
"at_risk": 0
|
234 |
+
}
|
235 |
+
|
236 |
+
def _identify_urgent_cases(self, analytics: Dict) -> List[str]:
|
237 |
+
"""Identify students needing immediate attention."""
|
238 |
+
try:
|
239 |
+
urgent_cases = []
|
240 |
+
for student in analytics.get("student_insights", []):
|
241 |
+
student_id = student.get("student_id")
|
242 |
+
if not student_id:
|
243 |
+
continue
|
244 |
+
|
245 |
+
# Check multiple risk factors
|
246 |
+
risk_factors = 0
|
247 |
+
|
248 |
+
# Factor 1: Performance level
|
249 |
+
if student.get("performance_level") in ["struggling", "at_risk", "low"]:
|
250 |
+
risk_factors += 1
|
251 |
+
|
252 |
+
# Factor 2: Number of struggling topics
|
253 |
+
if len(student.get("struggling_topics", [])) >= 2:
|
254 |
+
risk_factors += 1
|
255 |
+
|
256 |
+
# Factor 3: Engagement metrics
|
257 |
+
engagement = student.get("engagement_metrics", {})
|
258 |
+
if (engagement.get("participation_level") == "low" or
|
259 |
+
engagement.get("concept_understanding") == "needs_improvement"):
|
260 |
+
risk_factors += 1
|
261 |
+
|
262 |
+
# If student has multiple risk factors, add to urgent cases
|
263 |
+
if risk_factors >= 2:
|
264 |
+
urgent_cases.append(student_id)
|
265 |
+
|
266 |
+
return urgent_cases
|
267 |
+
except Exception as e:
|
268 |
+
print(f"Error identifying urgent cases: {str(e)}")
|
269 |
+
return []
|
270 |
+
|
271 |
+
def _identify_monitoring_cases(self, analytics: Dict) -> List[str]:
|
272 |
+
"""Identify students who need monitoring but aren't urgent cases."""
|
273 |
+
try:
|
274 |
+
monitoring_cases = []
|
275 |
+
urgent_cases = set(self._identify_urgent_cases(analytics))
|
276 |
+
|
277 |
+
for student in analytics.get("student_insights", []):
|
278 |
+
student_id = student.get("student_id")
|
279 |
+
if not student_id or student_id in urgent_cases:
|
280 |
+
continue
|
281 |
+
|
282 |
+
# Check monitoring criteria
|
283 |
+
monitoring_needed = False
|
284 |
+
|
285 |
+
# Criterion 1: Has some struggling topics but not enough for urgent
|
286 |
+
if len(student.get("struggling_topics", [])) == 1:
|
287 |
+
monitoring_needed = True
|
288 |
+
|
289 |
+
# Criterion 2: Medium-low engagement
|
290 |
+
engagement = student.get("engagement_metrics", {})
|
291 |
+
if engagement.get("participation_level") == "medium":
|
292 |
+
monitoring_needed = True
|
293 |
+
|
294 |
+
# Criterion 3: Recent performance decline
|
295 |
+
if student.get("performance_level") == "average":
|
296 |
+
monitoring_needed = True
|
297 |
+
|
298 |
+
if monitoring_needed:
|
299 |
+
monitoring_cases.append(student_id)
|
300 |
+
|
301 |
+
return monitoring_cases
|
302 |
+
except Exception as e:
|
303 |
+
print(f"Error identifying monitoring cases: {str(e)}")
|
304 |
+
return []
|
305 |
+
|
306 |
+
def _identify_critical_topics(self, analytics: Dict) -> List[str]:
|
307 |
+
"""
|
308 |
+
Identify critical topics that need attention based on multiple factors.
|
309 |
+
Returns a list of topic names that are considered critical.
|
310 |
+
"""
|
311 |
+
try:
|
312 |
+
critical_topics = []
|
313 |
+
topics = analytics.get("topic_insights", [])
|
314 |
+
|
315 |
+
for topic in topics:
|
316 |
+
if not isinstance(topic, dict):
|
317 |
+
continue
|
318 |
+
|
319 |
+
# Initialize score for topic criticality
|
320 |
+
critical_score = 0
|
321 |
+
|
322 |
+
# Factor 1: High difficulty level
|
323 |
+
difficulty_level = topic.get("difficulty_level", 0)
|
324 |
+
if difficulty_level > 0.7:
|
325 |
+
critical_score += 2
|
326 |
+
elif difficulty_level > 0.5:
|
327 |
+
critical_score += 1
|
328 |
+
|
329 |
+
# Factor 2: Number of students struggling
|
330 |
+
student_count = topic.get("student_count", 0)
|
331 |
+
total_students = len(analytics.get("student_insights", []))
|
332 |
+
if total_students > 0:
|
333 |
+
struggle_ratio = student_count / total_students
|
334 |
+
if struggle_ratio > 0.5:
|
335 |
+
critical_score += 2
|
336 |
+
elif struggle_ratio > 0.3:
|
337 |
+
critical_score += 1
|
338 |
+
|
339 |
+
# Factor 3: Number of common issues
|
340 |
+
if len(topic.get("common_issues", [])) > 2:
|
341 |
+
critical_score += 1
|
342 |
+
|
343 |
+
# Factor 4: Number of key misconceptions
|
344 |
+
if len(topic.get("key_misconceptions", [])) > 1:
|
345 |
+
critical_score += 1
|
346 |
+
|
347 |
+
# If topic exceeds threshold, mark as critical
|
348 |
+
if critical_score >= 3:
|
349 |
+
critical_topics.append(topic.get("topic", "Unknown Topic"))
|
350 |
+
|
351 |
+
return critical_topics
|
352 |
+
|
353 |
+
except Exception as e:
|
354 |
+
print(f"Error identifying critical topics: {str(e)}")
|
355 |
+
return []
|
356 |
+
|
357 |
+
def _calculate_engagement(self, analytics: Dict) -> Dict:
|
358 |
+
"""
|
359 |
+
Calculate detailed engagement metrics across all students.
|
360 |
+
Returns a dictionary with engagement statistics.
|
361 |
+
"""
|
362 |
+
try:
|
363 |
+
total_students = len(analytics.get("student_insights", []))
|
364 |
+
if total_students == 0:
|
365 |
+
return {
|
366 |
+
"total_students": 0,
|
367 |
+
"overall_score": 0,
|
368 |
+
"engagement_distribution": {
|
369 |
+
"high": 0,
|
370 |
+
"medium": 0,
|
371 |
+
"low": 0
|
372 |
+
},
|
373 |
+
"participation_metrics": {
|
374 |
+
"average_topics_per_student": 0,
|
375 |
+
"active_participants": 0
|
376 |
+
}
|
377 |
+
}
|
378 |
+
|
379 |
+
engagement_levels = defaultdict(int)
|
380 |
+
total_topics_engaged = 0
|
381 |
+
active_participants = 0
|
382 |
+
|
383 |
+
for student in analytics.get("student_insights", []):
|
384 |
+
# Get engagement metrics
|
385 |
+
metrics = student.get("engagement_metrics", {})
|
386 |
+
|
387 |
+
# Calculate participation level
|
388 |
+
participation = metrics.get("participation_level", "low").lower()
|
389 |
+
engagement_levels[participation] += 1
|
390 |
+
|
391 |
+
# Count topics student is engaged with
|
392 |
+
topics_count = len(student.get("struggling_topics", []))
|
393 |
+
total_topics_engaged += topics_count
|
394 |
+
|
395 |
+
# Count active participants (students engaging with any topics)
|
396 |
+
if topics_count > 0:
|
397 |
+
active_participants += 1
|
398 |
+
|
399 |
+
# Calculate overall engagement score (0-1)
|
400 |
+
weighted_score = (
|
401 |
+
(engagement_levels["high"] * 1.0 +
|
402 |
+
engagement_levels["medium"] * 0.6 +
|
403 |
+
engagement_levels["low"] * 0.2) / total_students
|
404 |
+
)
|
405 |
+
|
406 |
+
return {
|
407 |
+
"total_students": total_students,
|
408 |
+
"overall_score": round(weighted_score, 2),
|
409 |
+
"engagement_distribution": {
|
410 |
+
level: count/total_students
|
411 |
+
for level, count in engagement_levels.items()
|
412 |
+
},
|
413 |
+
"participation_metrics": {
|
414 |
+
"average_topics_per_student": round(total_topics_engaged / total_students, 2),
|
415 |
+
"active_participants_ratio": round(active_participants / total_students, 2)
|
416 |
+
}
|
417 |
+
}
|
418 |
+
|
419 |
+
except Exception as e:
|
420 |
+
print(f"Error calculating engagement: {str(e)}")
|
421 |
+
return {
|
422 |
+
"total_students": 0,
|
423 |
+
"overall_score": 0,
|
424 |
+
"engagement_distribution": {
|
425 |
+
"high": 0,
|
426 |
+
"medium": 0,
|
427 |
+
"low": 0
|
428 |
+
},
|
429 |
+
"participation_metrics": {
|
430 |
+
"average_topics_per_student": 0,
|
431 |
+
"active_participants_ratio": 0
|
432 |
+
}
|
433 |
+
}
|
434 |
+
|
435 |
+
def _process_gemini_response(self, response: str) -> Dict:
|
436 |
+
"""Process and validate Gemini's response."""
|
437 |
+
# try:
|
438 |
+
# analytics = json.loads(response)
|
439 |
+
# return self._enrich_analytics(analytics)
|
440 |
+
# except json.JSONDecodeError as e:
|
441 |
+
# print(f"Error decoding Gemini response: {e}")
|
442 |
+
# return self._fallback_analytics()
|
443 |
+
try:
|
444 |
+
# Parse JSON response
|
445 |
+
analytics = json.loads(response)
|
446 |
+
|
447 |
+
# Validate required fields exist
|
448 |
+
required_fields = {
|
449 |
+
"topic_insights": [],
|
450 |
+
"student_insights": [],
|
451 |
+
"recommended_actions": []
|
452 |
+
}
|
453 |
+
|
454 |
+
# Ensure all required fields exist with default values
|
455 |
+
for field, default_value in required_fields.items():
|
456 |
+
if field not in analytics or not analytics[field]:
|
457 |
+
analytics[field] = default_value
|
458 |
+
|
459 |
+
# Now enrich the validated analytics
|
460 |
+
return self._enrich_analytics(analytics)
|
461 |
+
|
462 |
+
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
463 |
+
print(f"Error processing Gemini response: {str(e)}")
|
464 |
+
print(f"Raw response: {response}")
|
465 |
+
return self._fallback_analytics()
|
466 |
+
|
467 |
+
def _enrich_analytics(self, analytics: Dict) -> Dict:
|
468 |
+
"""Add derived insights and metrics to the analytics."""
|
469 |
+
# Add overall course health metrics
|
470 |
+
analytics["course_health"] = {
|
471 |
+
"overall_engagement": self._calculate_engagement(analytics),
|
472 |
+
"critical_topics": self._identify_critical_topics(analytics),
|
473 |
+
"class_distribution": self._calculate_class_distribution(analytics)
|
474 |
+
}
|
475 |
+
|
476 |
+
# Add intervention urgency scores
|
477 |
+
analytics["intervention_metrics"] = {
|
478 |
+
"immediate_attention_needed": self._identify_urgent_cases(analytics),
|
479 |
+
"monitoring_required": self._identify_monitoring_cases(analytics)
|
480 |
+
}
|
481 |
+
|
482 |
+
return analytics
|
483 |
+
|
484 |
+
def _calculate_engagement(self, analytics: Dict) -> Dict:
|
485 |
+
# """Calculate overall engagement metrics."""
|
486 |
+
# total_students = len(analytics["student_insights"])
|
487 |
+
# engagement_levels = defaultdict(int)
|
488 |
+
|
489 |
+
# for student in analytics["student_insights"]:
|
490 |
+
# engagement_levels[student["engagement_metrics"]["participation_level"]] += 1
|
491 |
+
|
492 |
+
# return {
|
493 |
+
# "total_students": total_students,
|
494 |
+
# "engagement_distribution": {
|
495 |
+
# level: count/total_students
|
496 |
+
# for level, count in engagement_levels.items()
|
497 |
+
# }
|
498 |
+
# }
|
499 |
+
"""Calculate overall engagement metrics with defensive programming."""
|
500 |
+
try:
|
501 |
+
total_students = len(analytics.get("student_insights", []))
|
502 |
+
if total_students == 0:
|
503 |
+
return {
|
504 |
+
"total_students": 0,
|
505 |
+
"engagement_distribution": {
|
506 |
+
"high": 0,
|
507 |
+
"medium": 0,
|
508 |
+
"low": 0
|
509 |
+
}
|
510 |
+
}
|
511 |
+
|
512 |
+
engagement_levels = defaultdict(int)
|
513 |
+
|
514 |
+
for student in analytics.get("student_insights", []):
|
515 |
+
metrics = student.get("engagement_metrics", {})
|
516 |
+
level = metrics.get("participation_level", "low")
|
517 |
+
engagement_levels[level] += 1
|
518 |
+
|
519 |
+
return {
|
520 |
+
"total_students": total_students,
|
521 |
+
"engagement_distribution": {
|
522 |
+
level: count/total_students
|
523 |
+
for level, count in engagement_levels.items()
|
524 |
+
}
|
525 |
+
}
|
526 |
+
except Exception as e:
|
527 |
+
print(f"Error calculating engagement: {str(e)}")
|
528 |
+
return {
|
529 |
+
"total_students": 0,
|
530 |
+
"engagement_distribution": {
|
531 |
+
"high": 0,
|
532 |
+
"medium": 0,
|
533 |
+
"low": 0
|
534 |
+
}
|
535 |
+
}
|
536 |
+
|
537 |
+
def _identify_critical_topics(self, analytics: Dict) -> List[Dict]:
|
538 |
+
# """Identify topics needing immediate attention."""
|
539 |
+
# return [
|
540 |
+
# topic for topic in analytics["topic_insights"]
|
541 |
+
# if topic["difficulty_level"] > 0.7 or
|
542 |
+
# len(topic["common_issues"]) > 2
|
543 |
+
# ]
|
544 |
+
"""Identify topics needing immediate attention with defensive programming."""
|
545 |
+
try:
|
546 |
+
return [
|
547 |
+
topic for topic in analytics.get("topic_insights", [])
|
548 |
+
if topic.get("difficulty_level", 0) > 0.7 or
|
549 |
+
len(topic.get("common_issues", [])) > 2
|
550 |
+
]
|
551 |
+
except Exception as e:
|
552 |
+
print(f"Error identifying critical topics: {str(e)}")
|
553 |
+
return []
|
554 |
+
|
555 |
+
def generate_analytics(self, chat_histories: List[Dict], all_topics: List[str]) -> Dict:
|
556 |
+
# Method 1: (caused key 'student_insights' error):
|
557 |
+
# """Main method to generate analytics from chat histories."""
|
558 |
+
# # Preprocess chat histories
|
559 |
+
# processed_histories = self._preprocess_chat_histories(chat_histories)
|
560 |
+
|
561 |
+
# # Create and send prompt to Gemini
|
562 |
+
# prompt = self._create_analytics_prompt(processed_histories, all_topics)
|
563 |
+
# response = self.model.generate_content(
|
564 |
+
# prompt,
|
565 |
+
# generation_config=genai.GenerationConfig(
|
566 |
+
# response_mime_type="application/json",
|
567 |
+
# response_schema=AnalyticsResponse
|
568 |
+
# )
|
569 |
+
# )
|
570 |
+
|
571 |
+
# # # Process and enrich analytics
|
572 |
+
# # analytics = self._process_gemini_response(response.text)
|
573 |
+
# # return analytics
|
574 |
+
# # Process, validate, and enrich the response
|
575 |
+
# analytics = self._process_gemini_response(response.text)
|
576 |
+
|
577 |
+
# # Then cast it to satisfy the type checker
|
578 |
+
# return typing.cast(AnalyticsResponse, analytics)
|
579 |
+
|
580 |
+
# Method 2 (possible fix):
|
581 |
+
# """Main method to generate analytics with better error handling."""
|
582 |
+
# try:
|
583 |
+
# processed_histories = self._preprocess_chat_histories(chat_histories)
|
584 |
+
# prompt = self._create_analytics_prompt(processed_histories, all_topics)
|
585 |
+
|
586 |
+
# response = self.model.generate_content(
|
587 |
+
# prompt,
|
588 |
+
# generation_config=genai.GenerationConfig(
|
589 |
+
# response_mime_type="application/json",
|
590 |
+
# temperature=0.15
|
591 |
+
# # response_schema=AnalyticsResponse
|
592 |
+
# )
|
593 |
+
# )
|
594 |
+
|
595 |
+
# if not response.text:
|
596 |
+
# print("Empty response from Gemini")
|
597 |
+
# return self._fallback_analytics()
|
598 |
+
|
599 |
+
# # analytics = self._process_gemini_response(response.text)
|
600 |
+
# # return typing.cast(AnalyticsResponse, analytics)
|
601 |
+
# # return response.text;
|
602 |
+
# analytics = json.loads(response.text)
|
603 |
+
# return analytics
|
604 |
+
|
605 |
+
# except Exception as e:
|
606 |
+
# print(f"Error generating analytics: {str(e)}")
|
607 |
+
# return self._fallback_analytics()
|
608 |
+
|
609 |
+
|
610 |
+
# Debugging code:
|
611 |
+
"""Main method to generate analytics with better error handling."""
|
612 |
+
try:
|
613 |
+
# Debug print for input validation
|
614 |
+
print("Input validation:")
|
615 |
+
print(f"Chat histories: {len(chat_histories)} entries")
|
616 |
+
print(f"Topics: {all_topics}")
|
617 |
+
|
618 |
+
if not chat_histories or not all_topics:
|
619 |
+
print("Missing required input data")
|
620 |
+
return self._fallback_analytics()
|
621 |
+
|
622 |
+
# Debug the preprocessing step
|
623 |
+
try:
|
624 |
+
processed_histories = self._preprocess_chat_histories(chat_histories)
|
625 |
+
print("Successfully preprocessed chat histories")
|
626 |
+
except Exception as preprocess_error:
|
627 |
+
print(f"Error in preprocessing: {str(preprocess_error)}")
|
628 |
+
return self._fallback_analytics()
|
629 |
+
|
630 |
+
# Debug the prompt creation
|
631 |
+
try:
|
632 |
+
prompt = self._create_analytics_prompt(processed_histories, all_topics)
|
633 |
+
print("Successfully created prompt")
|
634 |
+
print("Prompt preview:", prompt[:200] + "...") # Print first 200 chars
|
635 |
+
except Exception as prompt_error:
|
636 |
+
print(f"Error in prompt creation: {str(prompt_error)}")
|
637 |
+
return self._fallback_analytics()
|
638 |
+
|
639 |
+
# Rest of the function remains the same
|
640 |
+
response = self.model.generate_content(
|
641 |
+
prompt,
|
642 |
+
generation_config=genai.GenerationConfig(
|
643 |
+
response_mime_type="application/json",
|
644 |
+
temperature=0.15
|
645 |
+
)
|
646 |
+
)
|
647 |
+
|
648 |
+
if not response.text:
|
649 |
+
print("Empty response from Gemini")
|
650 |
+
return self._fallback_analytics()
|
651 |
+
|
652 |
+
analytics = json.loads(response.text)
|
653 |
+
return analytics
|
654 |
+
|
655 |
+
except Exception as e:
|
656 |
+
print(f"Error generating analytics: {str(e)}")
|
657 |
+
print(f"Error type: {type(e)}")
|
658 |
+
import traceback
|
659 |
+
print("Full traceback:", traceback.format_exc())
|
660 |
+
return self._fallback_analytics()
|
661 |
+
|
662 |
+
def _preprocess_chat_histories(self, chat_histories: List[Dict]) -> List[Dict]:
|
663 |
+
# """Preprocess chat histories to focus on relevant information."""
|
664 |
+
# processed = []
|
665 |
+
|
666 |
+
# for chat in chat_histories:
|
667 |
+
# print(str(chat["user_id"]))
|
668 |
+
# processed_chat = {
|
669 |
+
# "user_id": str(chat["user_id"]),
|
670 |
+
# "messages": [
|
671 |
+
# {
|
672 |
+
# "prompt": msg["prompt"],
|
673 |
+
# "response": msg["response"]
|
674 |
+
# }
|
675 |
+
# for msg in chat["messages"]
|
676 |
+
# ]
|
677 |
+
# }
|
678 |
+
# processed.append(processed_chat)
|
679 |
+
|
680 |
+
# return processed
|
681 |
+
|
682 |
+
# Code 2:
|
683 |
+
"""Preprocess chat histories to focus on relevant information."""
|
684 |
+
processed = []
|
685 |
+
|
686 |
+
for chat in chat_histories:
|
687 |
+
# Convert ObjectId to string if it's an ObjectId
|
688 |
+
user_id = str(chat["user_id"]["$oid"]) if isinstance(chat["user_id"], dict) and "$oid" in chat["user_id"] else str(chat["user_id"])
|
689 |
+
|
690 |
+
try:
|
691 |
+
processed_chat = {
|
692 |
+
"user_id": user_id,
|
693 |
+
"messages": [
|
694 |
+
{
|
695 |
+
"prompt": msg["prompt"],
|
696 |
+
"response": msg["response"]
|
697 |
+
}
|
698 |
+
for msg in chat["messages"]
|
699 |
+
]
|
700 |
+
}
|
701 |
+
processed.append(processed_chat)
|
702 |
+
print(f"Successfully processed chat for user: {user_id}")
|
703 |
+
except Exception as e:
|
704 |
+
print(f"Error processing chat for user: {user_id}")
|
705 |
+
print(f"Error details: {str(e)}")
|
706 |
+
continue
|
707 |
+
|
708 |
+
return processed
|
709 |
+
|
710 |
+
def _fallback_analytics(self) -> Dict:
|
711 |
+
# """Provide basic analytics in case of LLM processing failure."""
|
712 |
+
# return {
|
713 |
+
# "topic_insights": [],
|
714 |
+
# "student_insights": [],
|
715 |
+
# "recommended_actions": [
|
716 |
+
# {
|
717 |
+
# "action": "Review analytics generation process",
|
718 |
+
# "priority": "high",
|
719 |
+
# "target_group": "system_administrators",
|
720 |
+
# "reasoning": "Analytics generation failed",
|
721 |
+
# "expected_impact": "Restore analytics functionality"
|
722 |
+
# }
|
723 |
+
# ]
|
724 |
+
# }
|
725 |
+
"""Provide comprehensive fallback analytics that match our schema."""
|
726 |
+
return {
|
727 |
+
"topic_insights": [],
|
728 |
+
"student_insights": [],
|
729 |
+
"recommended_actions": [
|
730 |
+
{
|
731 |
+
"action": "Review analytics generation process",
|
732 |
+
"priority": "high",
|
733 |
+
"target_group": "system_administrators",
|
734 |
+
"reasoning": "Analytics generation failed",
|
735 |
+
"expected_impact": "Restore analytics functionality"
|
736 |
+
}
|
737 |
+
],
|
738 |
+
"course_health": {
|
739 |
+
"overall_engagement": 0,
|
740 |
+
"critical_topics": [],
|
741 |
+
"class_distribution": {
|
742 |
+
"high_performers": 0,
|
743 |
+
"average_performers": 0,
|
744 |
+
"at_risk": 0
|
745 |
+
}
|
746 |
+
},
|
747 |
+
"intervention_metrics": {
|
748 |
+
"immediate_attention_needed": [],
|
749 |
+
"monitoring_required": []
|
750 |
+
}
|
751 |
+
}
|
752 |
+
|
753 |
+
# if __name__ == "__main__":
|
754 |
+
# # Example usage
|
755 |
+
|
756 |
+
|
757 |
+
# analytics_generator = NovaScholarAnalytics()
|
758 |
+
# analytics = analytics_generator.generate_analytics(chat_histories, all_topics)
|
759 |
+
# print(json.dumps(analytics, indent=2))
|
pre_class_analytics4.py
ADDED
@@ -0,0 +1,592 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from datetime import datetime
|
4 |
+
from typing import List, Dict, Any, Tuple
|
5 |
+
import spacy
|
6 |
+
from collections import Counter, defaultdict
|
7 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
+
from textblob import TextBlob
|
10 |
+
import networkx as nx
|
11 |
+
from scipy import stats
|
12 |
+
import logging
|
13 |
+
import json
|
14 |
+
from dataclasses import dataclass
|
15 |
+
from enum import Enum
|
16 |
+
|
17 |
+
# Configure logging
|
18 |
+
logging.basicConfig(level=logging.INFO)
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
class TopicDifficulty(Enum):
|
22 |
+
EASY = "easy"
|
23 |
+
MODERATE = "moderate"
|
24 |
+
DIFFICULT = "difficult"
|
25 |
+
VERY_DIFFICULT = "very_difficult"
|
26 |
+
|
27 |
+
|
28 |
+
@dataclass
|
29 |
+
class QuestionMetrics:
|
30 |
+
complexity_score: float
|
31 |
+
follow_up_count: int
|
32 |
+
clarification_count: int
|
33 |
+
time_spent: float
|
34 |
+
sentiment_score: float
|
35 |
+
|
36 |
+
@dataclass
|
37 |
+
class TopicInsights:
|
38 |
+
difficulty_level: TopicDifficulty
|
39 |
+
common_confusion_points: List[str]
|
40 |
+
question_patterns: List[str]
|
41 |
+
time_distribution: Dict[str, float]
|
42 |
+
engagement_metrics: Dict[str, float]
|
43 |
+
recommended_focus_areas: List[str]
|
44 |
+
|
45 |
+
def to_dict(self):
|
46 |
+
return {
|
47 |
+
"difficulty_level": self.difficulty_level.value, # Convert enum to its value
|
48 |
+
"common_confusion_points": self.common_confusion_points,
|
49 |
+
"question_patterns": self.question_patterns,
|
50 |
+
"time_distribution": {str(k): v for k, v in self.time_distribution.items()},
|
51 |
+
"engagement_metrics": self.engagement_metrics,
|
52 |
+
"recommended_focus_areas": self.recommended_focus_areas,
|
53 |
+
}
|
54 |
+
|
55 |
+
class PreClassAnalytics:
|
56 |
+
def __init__(self, nlp_model: str = "en_core_web_lg"):
|
57 |
+
"""Initialize the analytics system with necessary components."""
|
58 |
+
self.nlp = spacy.load(nlp_model)
|
59 |
+
self.question_indicators = {
|
60 |
+
"what", "why", "how", "when", "where", "which", "who",
|
61 |
+
"whose", "whom", "can", "could", "would", "will", "explain"
|
62 |
+
}
|
63 |
+
self.confusion_indicators = {
|
64 |
+
"confused", "don't understand", "unclear", "not clear",
|
65 |
+
"stuck", "difficult", "hard", "help", "explain again"
|
66 |
+
}
|
67 |
+
self.follow_up_indicators = {
|
68 |
+
"also", "another", "additionally", "furthermore", "moreover",
|
69 |
+
"besides", "related", "similarly", "again"
|
70 |
+
}
|
71 |
+
|
72 |
+
def preprocess_chat_history(self, chat_history: List[Dict]) -> pd.DataFrame:
|
73 |
+
"""Convert chat history to DataFrame with enhanced features."""
|
74 |
+
messages = []
|
75 |
+
for chat in chat_history:
|
76 |
+
user_id = chat['user_id']['$oid']
|
77 |
+
for msg in chat['messages']:
|
78 |
+
try:
|
79 |
+
# Ensure the timestamp is in the correct format
|
80 |
+
if isinstance(msg['timestamp'], dict) and '$date' in msg['timestamp']:
|
81 |
+
timestamp = pd.to_datetime(msg['timestamp']['$date'])
|
82 |
+
elif isinstance(msg['timestamp'], str):
|
83 |
+
timestamp = pd.to_datetime(msg['timestamp'])
|
84 |
+
else:
|
85 |
+
raise ValueError("Invalid timestamp format")
|
86 |
+
except Exception as e:
|
87 |
+
print(f"Error parsing timestamp: {msg['timestamp']}, error: {e}")
|
88 |
+
timestamp = pd.NaT # Use NaT (Not a Time) for invalid timestamps
|
89 |
+
|
90 |
+
messages.append({
|
91 |
+
'user_id': user_id,
|
92 |
+
'timestamp': timestamp,
|
93 |
+
'prompt': msg['prompt'],
|
94 |
+
'response': msg['response'],
|
95 |
+
'is_question': any(q in msg['prompt'].lower() for q in self.question_indicators),
|
96 |
+
'shows_confusion': any(c in msg['prompt'].lower() for c in self.confusion_indicators),
|
97 |
+
'is_followup': any(f in msg['prompt'].lower() for f in self.follow_up_indicators)
|
98 |
+
})
|
99 |
+
|
100 |
+
df = pd.DataFrame(messages)
|
101 |
+
df['sentiment'] = df['prompt'].apply(lambda x: TextBlob(x).sentiment.polarity)
|
102 |
+
return df
|
103 |
+
|
104 |
+
def extract_topic_hierarchies(self, df: pd.DataFrame) -> Dict[str, List[str]]:
|
105 |
+
"""Extract hierarchical topic relationships from conversations."""
|
106 |
+
topic_hierarchy = defaultdict(list)
|
107 |
+
|
108 |
+
for _, row in df.iterrows():
|
109 |
+
doc = self.nlp(row['prompt'])
|
110 |
+
|
111 |
+
# Extract main topics and subtopics using noun chunks and dependencies
|
112 |
+
main_topics = []
|
113 |
+
subtopics = []
|
114 |
+
|
115 |
+
for chunk in doc.noun_chunks:
|
116 |
+
if chunk.root.dep_ in ('nsubj', 'dobj'):
|
117 |
+
main_topics.append(chunk.text.lower())
|
118 |
+
else:
|
119 |
+
subtopics.append(chunk.text.lower())
|
120 |
+
|
121 |
+
# Build hierarchy
|
122 |
+
for main_topic in main_topics:
|
123 |
+
topic_hierarchy[main_topic].extend(subtopics)
|
124 |
+
|
125 |
+
# Clean and deduplicate
|
126 |
+
return {k: list(set(v)) for k, v in topic_hierarchy.items()}
|
127 |
+
|
128 |
+
def analyze_topic_difficulty(self, df: pd.DataFrame, topic: str) -> TopicDifficulty:
|
129 |
+
"""Determine topic difficulty based on various metrics."""
|
130 |
+
topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
|
131 |
+
|
132 |
+
# Calculate difficulty indicators
|
133 |
+
confusion_rate = topic_msgs['shows_confusion'].mean()
|
134 |
+
question_rate = topic_msgs['is_question'].mean()
|
135 |
+
follow_up_rate = topic_msgs['is_followup'].mean()
|
136 |
+
avg_sentiment = topic_msgs['sentiment'].mean()
|
137 |
+
|
138 |
+
# Calculate composite difficulty score
|
139 |
+
difficulty_score = (
|
140 |
+
confusion_rate * 0.4 +
|
141 |
+
question_rate * 0.3 +
|
142 |
+
follow_up_rate * 0.2 +
|
143 |
+
(1 - (avg_sentiment + 1) / 2) * 0.1
|
144 |
+
)
|
145 |
+
|
146 |
+
# Map score to difficulty level
|
147 |
+
if difficulty_score < 0.3:
|
148 |
+
return TopicDifficulty.EASY
|
149 |
+
elif difficulty_score < 0.5:
|
150 |
+
return TopicDifficulty.MODERATE
|
151 |
+
elif difficulty_score < 0.7:
|
152 |
+
return TopicDifficulty.DIFFICULT
|
153 |
+
else:
|
154 |
+
return TopicDifficulty.VERY_DIFFICULT
|
155 |
+
|
156 |
+
def identify_confusion_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
|
157 |
+
"""Identify common patterns in student confusion."""
|
158 |
+
confused_msgs = df[
|
159 |
+
(df['prompt'].str.contains(topic, case=False)) &
|
160 |
+
(df['shows_confusion'])
|
161 |
+
]['prompt']
|
162 |
+
|
163 |
+
patterns = []
|
164 |
+
for msg in confused_msgs:
|
165 |
+
doc = self.nlp(msg)
|
166 |
+
|
167 |
+
# Extract key phrases around confusion indicators
|
168 |
+
for sent in doc.sents:
|
169 |
+
for token in sent:
|
170 |
+
if token.text.lower() in self.confusion_indicators:
|
171 |
+
# Get context window around confusion indicator
|
172 |
+
context = sent.text
|
173 |
+
patterns.append(context)
|
174 |
+
|
175 |
+
# Group similar patterns
|
176 |
+
if patterns:
|
177 |
+
vectorizer = TfidfVectorizer(ngram_range=(1, 3))
|
178 |
+
tfidf_matrix = vectorizer.fit_transform(patterns)
|
179 |
+
similarity_matrix = cosine_similarity(tfidf_matrix)
|
180 |
+
|
181 |
+
# Cluster similar patterns
|
182 |
+
G = nx.Graph()
|
183 |
+
for i in range(len(patterns)):
|
184 |
+
for j in range(i + 1, len(patterns)):
|
185 |
+
if similarity_matrix[i][j] > 0.5: # Similarity threshold
|
186 |
+
G.add_edge(i, j)
|
187 |
+
|
188 |
+
# Extract representative patterns from each cluster
|
189 |
+
clusters = list(nx.connected_components(G))
|
190 |
+
return [patterns[min(cluster)] for cluster in clusters]
|
191 |
+
|
192 |
+
return []
|
193 |
+
|
194 |
+
def analyze_question_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
|
195 |
+
"""Analyze patterns in student questions about the topic."""
|
196 |
+
topic_questions = df[
|
197 |
+
(df['prompt'].str.contains(topic, case=False)) &
|
198 |
+
(df['is_question'])
|
199 |
+
]['prompt']
|
200 |
+
|
201 |
+
question_types = defaultdict(list)
|
202 |
+
for question in topic_questions:
|
203 |
+
doc = self.nlp(question)
|
204 |
+
|
205 |
+
# Categorize questions
|
206 |
+
if any(token.text.lower() in {"what", "define", "explain"} for token in doc):
|
207 |
+
question_types["conceptual"].append(question)
|
208 |
+
elif any(token.text.lower() in {"how", "steps", "process"} for token in doc):
|
209 |
+
question_types["procedural"].append(question)
|
210 |
+
elif any(token.text.lower() in {"why", "reason", "because"} for token in doc):
|
211 |
+
question_types["reasoning"].append(question)
|
212 |
+
else:
|
213 |
+
question_types["other"].append(question)
|
214 |
+
|
215 |
+
# Extract patterns from each category
|
216 |
+
patterns = []
|
217 |
+
for category, questions in question_types.items():
|
218 |
+
if questions:
|
219 |
+
vectorizer = TfidfVectorizer(ngram_range=(1, 3))
|
220 |
+
tfidf_matrix = vectorizer.fit_transform(questions)
|
221 |
+
|
222 |
+
# Get most representative questions
|
223 |
+
feature_array = np.mean(tfidf_matrix.toarray(), axis=0)
|
224 |
+
tfidf_sorting = np.argsort(feature_array)[::-1]
|
225 |
+
features = vectorizer.get_feature_names_out()
|
226 |
+
|
227 |
+
patterns.append(f"{category}: {' '.join(features[tfidf_sorting[:3]])}")
|
228 |
+
|
229 |
+
return patterns
|
230 |
+
|
231 |
+
def analyze_time_distribution(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
|
232 |
+
"""Analyze time spent on different aspects of the topic."""
|
233 |
+
topic_msgs = df[df['prompt'].str.contains(topic, case=False)].copy()
|
234 |
+
if len(topic_msgs) < 2:
|
235 |
+
return {}
|
236 |
+
|
237 |
+
topic_msgs['time_diff'] = topic_msgs['timestamp'].diff()
|
238 |
+
|
239 |
+
# Calculate time distribution
|
240 |
+
distribution = {
|
241 |
+
'total_time': topic_msgs['time_diff'].sum().total_seconds() / 60,
|
242 |
+
'avg_time_per_message': topic_msgs['time_diff'].mean().total_seconds() / 60,
|
243 |
+
'max_time_gap': topic_msgs['time_diff'].max().total_seconds() / 60,
|
244 |
+
'time_spent_on_questions': topic_msgs[topic_msgs['is_question']]['time_diff'].sum().total_seconds() / 60,
|
245 |
+
'time_spent_on_confusion': topic_msgs[topic_msgs['shows_confusion']]['time_diff'].sum().total_seconds() / 60
|
246 |
+
}
|
247 |
+
|
248 |
+
return distribution
|
249 |
+
|
250 |
+
def calculate_engagement_metrics(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
|
251 |
+
"""Calculate student engagement metrics for the topic."""
|
252 |
+
topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
|
253 |
+
|
254 |
+
metrics = {
|
255 |
+
'message_count': len(topic_msgs),
|
256 |
+
'question_ratio': topic_msgs['is_question'].mean(),
|
257 |
+
'confusion_ratio': topic_msgs['shows_confusion'].mean(),
|
258 |
+
'follow_up_ratio': topic_msgs['is_followup'].mean(),
|
259 |
+
'avg_sentiment': topic_msgs['sentiment'].mean(),
|
260 |
+
'engagement_score': 0.0 # Will be calculated below
|
261 |
+
}
|
262 |
+
|
263 |
+
# Calculate engagement score
|
264 |
+
metrics['engagement_score'] = (
|
265 |
+
metrics['message_count'] * 0.3 +
|
266 |
+
metrics['question_ratio'] * 0.25 +
|
267 |
+
metrics['follow_up_ratio'] * 0.25 +
|
268 |
+
(metrics['avg_sentiment'] + 1) / 2 * 0.2 # Normalize sentiment to 0-1
|
269 |
+
)
|
270 |
+
|
271 |
+
return metrics
|
272 |
+
|
273 |
+
def generate_topic_insights(self, df: pd.DataFrame, topic: str) -> TopicInsights:
|
274 |
+
"""Generate comprehensive insights for a topic."""
|
275 |
+
difficulty = self.analyze_topic_difficulty(df, topic)
|
276 |
+
confusion_points = self.identify_confusion_patterns(df, topic)
|
277 |
+
question_patterns = self.analyze_question_patterns(df, topic)
|
278 |
+
time_distribution = self.analyze_time_distribution(df, topic)
|
279 |
+
engagement_metrics = self.calculate_engagement_metrics(df, topic)
|
280 |
+
|
281 |
+
# Generate recommended focus areas based on insights
|
282 |
+
focus_areas = []
|
283 |
+
|
284 |
+
if difficulty in (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT):
|
285 |
+
focus_areas.append("Fundamental concept reinforcement needed")
|
286 |
+
|
287 |
+
if confusion_points:
|
288 |
+
focus_areas.append(f"Address common confusion around: {', '.join(confusion_points[:3])}")
|
289 |
+
|
290 |
+
if engagement_metrics['confusion_ratio'] > 0.3:
|
291 |
+
focus_areas.append("Consider alternative teaching approaches")
|
292 |
+
|
293 |
+
if time_distribution.get('time_spent_on_questions', 0) > time_distribution.get('total_time', 0) * 0.5:
|
294 |
+
focus_areas.append("More practical examples or demonstrations needed")
|
295 |
+
|
296 |
+
return TopicInsights(
|
297 |
+
difficulty_level=difficulty,
|
298 |
+
common_confusion_points=confusion_points,
|
299 |
+
question_patterns=question_patterns,
|
300 |
+
time_distribution=time_distribution,
|
301 |
+
engagement_metrics=engagement_metrics,
|
302 |
+
recommended_focus_areas=focus_areas
|
303 |
+
)
|
304 |
+
|
305 |
+
def analyze_student_progress(self, df: pd.DataFrame) -> Dict[str, Any]:
|
306 |
+
"""Analyze individual student progress and learning patterns."""
|
307 |
+
student_progress = {}
|
308 |
+
|
309 |
+
for student_id in df['user_id'].unique():
|
310 |
+
student_msgs = df[df['user_id'] == student_id]
|
311 |
+
|
312 |
+
# Calculate student-specific metrics
|
313 |
+
progress = {
|
314 |
+
'total_messages': len(student_msgs),
|
315 |
+
'questions_asked': student_msgs['is_question'].sum(),
|
316 |
+
'confusion_instances': student_msgs['shows_confusion'].sum(),
|
317 |
+
'avg_sentiment': student_msgs['sentiment'].mean(),
|
318 |
+
'topic_engagement': {},
|
319 |
+
'learning_pattern': self._identify_learning_pattern(student_msgs)
|
320 |
+
}
|
321 |
+
|
322 |
+
# Analyze topic-specific engagement
|
323 |
+
topics = self.extract_topic_hierarchies(student_msgs)
|
324 |
+
for topic in topics:
|
325 |
+
topic_msgs = student_msgs[student_msgs['prompt'].str.contains(topic, case=False)]
|
326 |
+
progress['topic_engagement'][topic] = {
|
327 |
+
'message_count': len(topic_msgs),
|
328 |
+
'confusion_rate': topic_msgs['shows_confusion'].mean(),
|
329 |
+
'sentiment_trend': stats.linregress(
|
330 |
+
range(len(topic_msgs)),
|
331 |
+
topic_msgs['sentiment']
|
332 |
+
).slope
|
333 |
+
}
|
334 |
+
|
335 |
+
student_progress[student_id] = progress
|
336 |
+
|
337 |
+
return student_progress
|
338 |
+
|
339 |
+
def _identify_learning_pattern(self, student_msgs: pd.DataFrame) -> str:
|
340 |
+
"""Identify student's learning pattern based on their interaction style."""
|
341 |
+
# Calculate key metrics
|
342 |
+
question_ratio = student_msgs['is_question'].mean()
|
343 |
+
confusion_ratio = student_msgs['shows_confusion'].mean()
|
344 |
+
follow_up_ratio = student_msgs['is_followup'].mean()
|
345 |
+
sentiment_trend = stats.linregress(
|
346 |
+
range(len(student_msgs)),
|
347 |
+
student_msgs['sentiment']
|
348 |
+
).slope
|
349 |
+
|
350 |
+
# Identify pattern
|
351 |
+
if question_ratio > 0.6:
|
352 |
+
return "Inquisitive Learner"
|
353 |
+
elif confusion_ratio > 0.4:
|
354 |
+
return "Needs Additional Support"
|
355 |
+
elif follow_up_ratio > 0.5:
|
356 |
+
return "Deep Dive Learner"
|
357 |
+
elif sentiment_trend > 0:
|
358 |
+
return "Progressive Learner"
|
359 |
+
else:
|
360 |
+
return "Steady Learner"
|
361 |
+
|
362 |
+
def generate_comprehensive_report(self, chat_history: List[Dict]) -> Dict[str, Any]:
|
363 |
+
"""Generate a comprehensive analytics report."""
|
364 |
+
# Preprocess chat history
|
365 |
+
df = self.preprocess_chat_history(chat_history)
|
366 |
+
|
367 |
+
# Extract topics
|
368 |
+
topics = self.extract_topic_hierarchies(df)
|
369 |
+
|
370 |
+
report = {
|
371 |
+
'topics': {},
|
372 |
+
'student_progress': self.analyze_student_progress(df),
|
373 |
+
'overall_metrics': {
|
374 |
+
'total_conversations': len(df),
|
375 |
+
'unique_students': df['user_id'].nunique(),
|
376 |
+
'avg_sentiment': df['sentiment'].mean(),
|
377 |
+
'most_discussed_topics': Counter(
|
378 |
+
topic for topics_list in topics.values()
|
379 |
+
for topic in topics_list
|
380 |
+
).most_common(5)
|
381 |
+
}
|
382 |
+
}
|
383 |
+
|
384 |
+
# Generate topic-specific insights
|
385 |
+
for main_topic, subtopics in topics.items():
|
386 |
+
subtopic_insights = {}
|
387 |
+
for subtopic in subtopics:
|
388 |
+
subtopic_insights[subtopic] = {
|
389 |
+
'insights': self.generate_topic_insights(df, subtopic),
|
390 |
+
'related_topics': [t for t in subtopics if t != subtopic],
|
391 |
+
'student_engagement': {
|
392 |
+
student_id: self.calculate_engagement_metrics(
|
393 |
+
df[df['user_id'] == student_id],
|
394 |
+
subtopic
|
395 |
+
)
|
396 |
+
for student_id in df['user_id'].unique()
|
397 |
+
}
|
398 |
+
}
|
399 |
+
|
400 |
+
report['topics'][main_topic] = {
|
401 |
+
'insights': self.generate_topic_insights(df, main_topic),
|
402 |
+
'subtopics': subtopic_insights,
|
403 |
+
'topic_relationships': {
|
404 |
+
'hierarchy_depth': len(subtopics),
|
405 |
+
'connection_strength': self._calculate_topic_connections(df, main_topic, subtopics),
|
406 |
+
'progression_path': self._identify_topic_progression(df, main_topic, subtopics)
|
407 |
+
}
|
408 |
+
}
|
409 |
+
|
410 |
+
# Add temporal analysis
|
411 |
+
report['temporal_analysis'] = {
|
412 |
+
'daily_engagement': df.groupby(df['timestamp'].dt.date).agg({
|
413 |
+
'user_id': 'count',
|
414 |
+
'is_question': 'sum',
|
415 |
+
'shows_confusion': 'sum',
|
416 |
+
'sentiment': 'mean'
|
417 |
+
}).to_dict(),
|
418 |
+
'peak_activity_hours': df.groupby(df['timestamp'].dt.hour)['user_id'].count().nlargest(3).to_dict(),
|
419 |
+
'learning_trends': self._analyze_learning_trends(df)
|
420 |
+
}
|
421 |
+
|
422 |
+
# Add recommendations
|
423 |
+
report['recommendations'] = self._generate_recommendations(report)
|
424 |
+
|
425 |
+
return report
|
426 |
+
|
427 |
+
def _calculate_topic_connections(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> Dict[str, float]:
|
428 |
+
"""Calculate connection strength between topics based on co-occurrence."""
|
429 |
+
connections = {}
|
430 |
+
main_topic_msgs = df[df['prompt'].str.contains(main_topic, case=False)]
|
431 |
+
|
432 |
+
for subtopic in subtopics:
|
433 |
+
cooccurrence = df[
|
434 |
+
df['prompt'].str.contains(main_topic, case=False) &
|
435 |
+
df['prompt'].str.contains(subtopic, case=False)
|
436 |
+
].shape[0]
|
437 |
+
|
438 |
+
connection_strength = cooccurrence / len(main_topic_msgs) if len(main_topic_msgs) > 0 else 0
|
439 |
+
connections[subtopic] = connection_strength
|
440 |
+
|
441 |
+
return connections
|
442 |
+
|
443 |
+
def _identify_topic_progression(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> List[str]:
|
444 |
+
"""Identify optimal topic progression path based on student interactions."""
|
445 |
+
topic_difficulties = {}
|
446 |
+
|
447 |
+
for subtopic in subtopics:
|
448 |
+
difficulty = self.analyze_topic_difficulty(df, subtopic)
|
449 |
+
topic_difficulties[subtopic] = difficulty.value
|
450 |
+
|
451 |
+
# Sort subtopics by difficulty
|
452 |
+
return sorted(subtopics, key=lambda x: topic_difficulties[x])
|
453 |
+
|
454 |
+
def _analyze_learning_trends(self, df: pd.DataFrame) -> Dict[str, Any]:
|
455 |
+
"""Analyze overall learning trends across the dataset."""
|
456 |
+
return {
|
457 |
+
'sentiment_trend': stats.linregress(
|
458 |
+
range(len(df)),
|
459 |
+
df['sentiment']
|
460 |
+
)._asdict(),
|
461 |
+
'confusion_trend': stats.linregress(
|
462 |
+
range(len(df)),
|
463 |
+
df['shows_confusion']
|
464 |
+
)._asdict(),
|
465 |
+
'engagement_progression': self._calculate_engagement_progression(df)
|
466 |
+
}
|
467 |
+
|
468 |
+
def _calculate_engagement_progression(self, df: pd.DataFrame) -> Dict[str, float]:
|
469 |
+
"""Calculate how student engagement changes over time."""
|
470 |
+
df['week'] = df['timestamp'].dt.isocalendar().week
|
471 |
+
weekly_engagement = df.groupby('week').agg({
|
472 |
+
'is_question': 'mean',
|
473 |
+
'shows_confusion': 'mean',
|
474 |
+
'is_followup': 'mean',
|
475 |
+
'sentiment': 'mean'
|
476 |
+
})
|
477 |
+
|
478 |
+
return {
|
479 |
+
'question_trend': stats.linregress(
|
480 |
+
range(len(weekly_engagement)),
|
481 |
+
weekly_engagement['is_question']
|
482 |
+
).slope,
|
483 |
+
'confusion_trend': stats.linregress(
|
484 |
+
range(len(weekly_engagement)),
|
485 |
+
weekly_engagement['shows_confusion']
|
486 |
+
).slope,
|
487 |
+
'follow_up_trend': stats.linregress(
|
488 |
+
range(len(weekly_engagement)),
|
489 |
+
weekly_engagement['is_followup']
|
490 |
+
).slope,
|
491 |
+
'sentiment_trend': stats.linregress(
|
492 |
+
range(len(weekly_engagement)),
|
493 |
+
weekly_engagement['sentiment']
|
494 |
+
).slope
|
495 |
+
}
|
496 |
+
|
497 |
+
def _generate_recommendations(self, report: Dict[str, Any]) -> List[str]:
|
498 |
+
"""Generate actionable recommendations based on the analysis."""
|
499 |
+
recommendations = []
|
500 |
+
|
501 |
+
# Analyze difficulty distribution
|
502 |
+
difficult_topics = [
|
503 |
+
topic for topic, data in report['topics'].items()
|
504 |
+
if data['insights'].difficulty_level in
|
505 |
+
(TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT)
|
506 |
+
]
|
507 |
+
|
508 |
+
if difficult_topics:
|
509 |
+
recommendations.append(
|
510 |
+
f"Consider providing additional resources for challenging topics: {', '.join(difficult_topics)}"
|
511 |
+
)
|
512 |
+
|
513 |
+
# Analyze student engagement
|
514 |
+
avg_engagement = np.mean([
|
515 |
+
progress['questions_asked'] / progress['total_messages']
|
516 |
+
for progress in report['student_progress'].values()
|
517 |
+
])
|
518 |
+
|
519 |
+
if avg_engagement < 0.3:
|
520 |
+
recommendations.append(
|
521 |
+
"Implement more interactive elements to increase student engagement"
|
522 |
+
)
|
523 |
+
|
524 |
+
# Analyze temporal patterns
|
525 |
+
peak_hours = list(report['temporal_analysis']['peak_activity_hours'].keys())
|
526 |
+
recommendations.append(
|
527 |
+
f"Consider scheduling additional support during peak activity hours: {peak_hours}"
|
528 |
+
)
|
529 |
+
|
530 |
+
# Analyze learning trends
|
531 |
+
# sentiment_trend = report['temporal_analysis']['learning_trends']['sentiment_trend']
|
532 |
+
# if sentiment_trend < 0:
|
533 |
+
# recommendations.append(
|
534 |
+
# "Review teaching approach to address declining student satisfaction"
|
535 |
+
# )
|
536 |
+
# Analyze learning trends
|
537 |
+
# Analyze learning trends
|
538 |
+
sentiment_trend = report.get('temporal_analysis', {}).get('learning_trends', {}).get('sentiment_trend', None)
|
539 |
+
if isinstance(sentiment_trend, (int, float)):
|
540 |
+
if sentiment_trend < 0:
|
541 |
+
recommendations.append(
|
542 |
+
"Review teaching approach to address declining student satisfaction"
|
543 |
+
)
|
544 |
+
elif isinstance(sentiment_trend, dict):
|
545 |
+
# Handle the case where sentiment_trend is a dictionary
|
546 |
+
print(f"Unexpected dict format for sentiment_trend: {sentiment_trend}")
|
547 |
+
else:
|
548 |
+
print(f"Unexpected type for sentiment_trend: {type(sentiment_trend)}")
|
549 |
+
|
550 |
+
return recommendations
|
551 |
+
|
552 |
+
class CustomJSONEncoder(json.JSONEncoder):
|
553 |
+
def default(self, obj):
|
554 |
+
if isinstance(obj, TopicDifficulty):
|
555 |
+
return obj.value
|
556 |
+
if isinstance(obj, TopicInsights):
|
557 |
+
return obj.to_dict()
|
558 |
+
if isinstance(obj, np.integer):
|
559 |
+
return int(obj)
|
560 |
+
if isinstance(obj, np.floating):
|
561 |
+
return float(obj)
|
562 |
+
if isinstance(obj, np.ndarray):
|
563 |
+
return obj.tolist()
|
564 |
+
if isinstance(obj, datetime):
|
565 |
+
return obj.isoformat()
|
566 |
+
return super().default(obj)
|
567 |
+
|
568 |
+
def convert_insights_to_dict(report):
|
569 |
+
for main_topic, data in report['topics'].items():
|
570 |
+
if isinstance(data['insights'], TopicInsights):
|
571 |
+
data['insights'] = data['insights'].to_dict()
|
572 |
+
for subtopic, subdata in data['subtopics'].items():
|
573 |
+
if isinstance(subdata['insights'], TopicInsights):
|
574 |
+
subdata['insights'] = subdata['insights'].to_dict()
|
575 |
+
|
576 |
+
if __name__ == "__main__":
|
577 |
+
# Load chat history data
|
578 |
+
chat_history = None
|
579 |
+
with open('sample_files/chat_history_corpus.json', 'r', encoding="utf-8") as file:
|
580 |
+
chat_history = json.load(file)
|
581 |
+
|
582 |
+
# Initialize analytics system
|
583 |
+
analytics = PreClassAnalytics()
|
584 |
+
|
585 |
+
# Generate comprehensive report
|
586 |
+
report = analytics.generate_comprehensive_report(chat_history)
|
587 |
+
|
588 |
+
# Convert insights to dictionary
|
589 |
+
# convert_insights_to_dict(report)
|
590 |
+
|
591 |
+
print(json.dumps(report, indent=4, cls=CustomJSONEncoder))
|
592 |
+
# print(report)
|
requirements.txt
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pymongo
|
3 |
+
PyPDF2
|
4 |
+
python-docx
|
5 |
+
openai
|
6 |
+
google-generativeai
|
7 |
+
llama-index
|
8 |
+
werkzeug
|
9 |
+
numpy
|
10 |
+
pandas
|
11 |
+
plotly
|
12 |
+
scikit-learn
|
13 |
+
networkx
|
14 |
+
community
|
15 |
+
umap-learn
|
16 |
+
seaborn
|
17 |
+
matplotlib
|
18 |
+
scipy
|
19 |
+
Pillow
|
20 |
+
python-dotenv
|
21 |
+
zoomus
|
22 |
+
asyncio
|
23 |
+
google-auth-oauthlib
|
24 |
+
google-auth
|
25 |
+
transformers
|
26 |
+
textstat
|
27 |
+
spacy
|
28 |
+
streamlit_option_menu
|
29 |
+
beautifulsoup4
|
30 |
+
youtube-transcript-api
|
31 |
+
requests
|
32 |
+
xml==0.0.1
|
33 |
+
networkx==3.1
|
34 |
+
bokeh==3.2.1
|
35 |
+
|
36 |
+
scikit-learn==1.2.2
|
37 |
+
langchain==0.0.208
|
research22.py
ADDED
@@ -0,0 +1,517 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# if __name__ == "__main__":
|
2 |
+
# main()
|
3 |
+
import streamlit as st
|
4 |
+
import google.generativeai as genai
|
5 |
+
from typing import Dict, Any
|
6 |
+
import PyPDF2
|
7 |
+
import io
|
8 |
+
from pymongo import MongoClient
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
import os
|
11 |
+
import json
|
12 |
+
import re
|
13 |
+
|
14 |
+
# --------------------------------------------------------------------------------
|
15 |
+
# 1. Environment Setup
|
16 |
+
# --------------------------------------------------------------------------------
|
17 |
+
load_dotenv()
|
18 |
+
# MongoDB
|
19 |
+
MONGODB_URI = os.getenv(
|
20 |
+
"MONGODB_UR",
|
21 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
22 |
+
)
|
23 |
+
# Gemini
|
24 |
+
GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
|
25 |
+
|
26 |
+
# Configure Gemini
|
27 |
+
genai.configure(api_key=GEMINI_KEY)
|
28 |
+
|
29 |
+
|
30 |
+
# --------------------------------------------------------------------------------
|
31 |
+
# 2. Database Connection
|
32 |
+
# --------------------------------------------------------------------------------
|
33 |
+
def create_db_connection():
|
34 |
+
"""
|
35 |
+
Create MongoDB connection and return the 'papers' collection.
|
36 |
+
"""
|
37 |
+
try:
|
38 |
+
client = MongoClient(MONGODB_URI)
|
39 |
+
db = client["novascholar_db"] # Database name
|
40 |
+
collection = db["research_papers"] # Collection name
|
41 |
+
# Ping to confirm connection
|
42 |
+
client.admin.command("ping")
|
43 |
+
return db
|
44 |
+
except Exception as e:
|
45 |
+
st.error(f"Database connection error: {str(e)}")
|
46 |
+
return None
|
47 |
+
|
48 |
+
|
49 |
+
# --------------------------------------------------------------------------------
|
50 |
+
# 3. PDF Text Extraction
|
51 |
+
# --------------------------------------------------------------------------------
|
52 |
+
def extract_text_from_pdf(pdf_file) -> str:
|
53 |
+
"""
|
54 |
+
Extract all text from a PDF.
|
55 |
+
"""
|
56 |
+
try:
|
57 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
58 |
+
text = ""
|
59 |
+
for page in pdf_reader.pages:
|
60 |
+
text += page.extract_text() + "\n"
|
61 |
+
return text
|
62 |
+
except Exception as e:
|
63 |
+
st.error(f"Error processing PDF: {str(e)}")
|
64 |
+
return ""
|
65 |
+
|
66 |
+
|
67 |
+
# --------------------------------------------------------------------------------
|
68 |
+
# 4. Gemini Response Helper
|
69 |
+
# --------------------------------------------------------------------------------
|
70 |
+
def get_gemini_response(prompt: str) -> str:
|
71 |
+
"""
|
72 |
+
Sends a prompt to Google's Gemini model and returns the response text.
|
73 |
+
Adjust this function as needed for your generative AI usage.
|
74 |
+
"""
|
75 |
+
try:
|
76 |
+
model = genai.GenerativeModel("gemini-pro")
|
77 |
+
response = model.generate_content(prompt)
|
78 |
+
return response.text
|
79 |
+
except Exception as e:
|
80 |
+
st.error(f"Gemini API Error: {str(e)}")
|
81 |
+
return ""
|
82 |
+
|
83 |
+
|
84 |
+
# --------------------------------------------------------------------------------
|
85 |
+
# 5. Basic Info Extraction
|
86 |
+
# --------------------------------------------------------------------------------
|
87 |
+
def extract_basic_info(text: str) -> Dict[str, str]:
|
88 |
+
"""
|
89 |
+
Extract title, publication, journal/conference, abstract, keywords, author, and date from the paper text.
|
90 |
+
Return a dictionary with these fields.
|
91 |
+
"""
|
92 |
+
prompt = f"""
|
93 |
+
Extract the following fields from the research paper text below:
|
94 |
+
|
95 |
+
Title
|
96 |
+
Publication
|
97 |
+
Journal_Conference
|
98 |
+
Abstract
|
99 |
+
Keywords
|
100 |
+
Author
|
101 |
+
Date_of_Publication
|
102 |
+
|
103 |
+
Paper text:
|
104 |
+
{text}
|
105 |
+
|
106 |
+
Return them in this format:
|
107 |
+
Title: ...
|
108 |
+
Publication: ...
|
109 |
+
Journal_Conference: ...
|
110 |
+
Abstract: ...
|
111 |
+
Keywords: ...
|
112 |
+
Author: ...
|
113 |
+
Date_of_Publication: ...
|
114 |
+
"""
|
115 |
+
response = get_gemini_response(prompt)
|
116 |
+
if not response:
|
117 |
+
return {}
|
118 |
+
info = {}
|
119 |
+
lines = response.split("\n")
|
120 |
+
for line in lines:
|
121 |
+
if ":" in line:
|
122 |
+
key, value = line.split(":", 1)
|
123 |
+
info[key.strip()] = value.strip()
|
124 |
+
return info
|
125 |
+
|
126 |
+
|
127 |
+
# --------------------------------------------------------------------------------
|
128 |
+
# 6. Content Sections Extraction
|
129 |
+
# --------------------------------------------------------------------------------
|
130 |
+
def extract_content_sections(text: str) -> Dict[str, str]:
|
131 |
+
"""
|
132 |
+
Extract expanded sections: Intro, Literature_Review, Research_Models_Used,
|
133 |
+
Methodology, Discussion, Future_Scope, Theory.
|
134 |
+
"""
|
135 |
+
prompt = f"""Please extract these sections from the research paper:
|
136 |
+
1. Introduction
|
137 |
+
2. Literature Review
|
138 |
+
3. Research Models Used
|
139 |
+
4. Methodology
|
140 |
+
5. Discussion
|
141 |
+
6. Future Scope
|
142 |
+
7. Theory
|
143 |
+
|
144 |
+
Paper text: {text}
|
145 |
+
|
146 |
+
Return in this exact format without any additional text or explanations also make sure
|
147 |
+
no data should be empty (at least 10-15 words) and it should be meaningful:
|
148 |
+
Intro: <text>
|
149 |
+
Literature_Review: <text>
|
150 |
+
Research_Models_Used: <text>
|
151 |
+
Methodology: <text>
|
152 |
+
Discussion: <text>
|
153 |
+
Future_Scope: <text>
|
154 |
+
Theory: <text>
|
155 |
+
"""
|
156 |
+
response = get_gemini_response(prompt)
|
157 |
+
if not response:
|
158 |
+
return {}
|
159 |
+
sections = {}
|
160 |
+
lines = response.split("\n")
|
161 |
+
for line in lines:
|
162 |
+
if ":" in line:
|
163 |
+
key, value = line.split(":", 1)
|
164 |
+
sections[key.strip()] = value.strip()
|
165 |
+
return sections
|
166 |
+
|
167 |
+
|
168 |
+
# --------------------------------------------------------------------------------
|
169 |
+
# 7. Variables Extraction
|
170 |
+
# --------------------------------------------------------------------------------
|
171 |
+
def extract_variables(text: str) -> Dict[str, Any]:
|
172 |
+
"""
|
173 |
+
Extract variable data: Independent_Variables, nof_Independent_Variables,
|
174 |
+
Dependent_Variables, nof_Dependent_Variables, Control_Variables,
|
175 |
+
Extraneous_Variables, nof_Control_Variables, nof_Extraneous_Variables
|
176 |
+
"""
|
177 |
+
prompt = f"""From the paper text, extract the following fields:
|
178 |
+
1. Independent_Variables
|
179 |
+
2. nof_Independent_Variables
|
180 |
+
3. Dependent_Variables
|
181 |
+
4. nof_Dependent_Variables
|
182 |
+
5. Control_Variables
|
183 |
+
6. Extraneous_Variables
|
184 |
+
7. nof_Control_Variables
|
185 |
+
8. nof_Extraneous_Variables
|
186 |
+
|
187 |
+
Return them in this format:
|
188 |
+
Independent_Variables: <list>
|
189 |
+
nof_Independent_Variables: <integer>
|
190 |
+
Dependent_Variables: <list>
|
191 |
+
nof_Dependent_Variables: <integer>
|
192 |
+
Control_Variables: <list>
|
193 |
+
Extraneous_Variables: <list>
|
194 |
+
nof_Control_Variables: <integer>
|
195 |
+
nof_Extraneous_Variables: <integer>
|
196 |
+
|
197 |
+
Paper text: {text}
|
198 |
+
"""
|
199 |
+
response = get_gemini_response(prompt)
|
200 |
+
if not response:
|
201 |
+
return {}
|
202 |
+
variables = {}
|
203 |
+
lines = response.split("\n")
|
204 |
+
for line in lines:
|
205 |
+
if ":" in line:
|
206 |
+
key, value = line.split(":", 1)
|
207 |
+
# Attempt to convert to integer where appropriate
|
208 |
+
clean_key = key.strip()
|
209 |
+
clean_value = value.strip()
|
210 |
+
if clean_key.startswith("nof_"):
|
211 |
+
try:
|
212 |
+
variables[clean_key] = int(clean_value)
|
213 |
+
except ValueError:
|
214 |
+
# fallback if it's not an integer
|
215 |
+
variables[clean_key] = 0
|
216 |
+
else:
|
217 |
+
variables[clean_key] = clean_value
|
218 |
+
return variables
|
219 |
+
|
220 |
+
|
221 |
+
# --------------------------------------------------------------------------------
|
222 |
+
# 8. Utility to ensure no empty fields (example logic)
|
223 |
+
# --------------------------------------------------------------------------------
|
224 |
+
def ensure_non_empty_values(data: Dict[str, Any], fallback_text: str) -> Dict[str, Any]:
|
225 |
+
"""
|
226 |
+
Ensure each extracted field has meaningful content. If empty, fill with default text.
|
227 |
+
"""
|
228 |
+
for k, v in data.items():
|
229 |
+
if not v or len(str(v).split()) < 3: # example check for minimal words
|
230 |
+
data[k] = f"No sufficient data found for {k}. Could not parse."
|
231 |
+
return data
|
232 |
+
|
233 |
+
|
234 |
+
# --------------------------------------------------------------------------------
|
235 |
+
# 9. Processing the Paper
|
236 |
+
# --------------------------------------------------------------------------------
|
237 |
+
# def process_paper(text: str) -> Dict[str, Any]:
|
238 |
+
# """
|
239 |
+
# Orchestrate calls to extract basic info, content sections, and variables.
|
240 |
+
# Return a dictionary containing all the fields with consistent naming.
|
241 |
+
# """
|
242 |
+
# with st.spinner("Extracting basic information..."):
|
243 |
+
# basic_info = extract_basic_info(text)
|
244 |
+
# basic_info = ensure_non_empty_values(basic_info, text)
|
245 |
+
|
246 |
+
# with st.spinner("Extracting content sections..."):
|
247 |
+
# content_sections = extract_content_sections(text)
|
248 |
+
# content_sections = ensure_non_empty_values(content_sections, text)
|
249 |
+
|
250 |
+
# with st.spinner("Extracting variables..."):
|
251 |
+
# variables_info = extract_variables(text)
|
252 |
+
# variables_info = ensure_non_empty_values(variables_info, text)
|
253 |
+
|
254 |
+
# # Create a single dictionary with all fields
|
255 |
+
# paper_doc = {
|
256 |
+
# "Title": basic_info.get("Title", ""),
|
257 |
+
# "Publication": basic_info.get("Publication", ""),
|
258 |
+
# "Journal_Conference": basic_info.get("Journal_Conference", ""),
|
259 |
+
# "Abstract": basic_info.get("Abstract", ""),
|
260 |
+
# "Keywords": basic_info.get("Keywords", ""),
|
261 |
+
# "Author": basic_info.get("Author", ""),
|
262 |
+
# "Date_of_Publication": basic_info.get("Date_of_Publication", ""),
|
263 |
+
# "Intro": content_sections.get("Intro", ""),
|
264 |
+
# "Literature_Review": content_sections.get("Literature_Review", ""),
|
265 |
+
# "Research_Models_Used": content_sections.get("Research_Models_Used", ""),
|
266 |
+
# "Methodology": content_sections.get("Methodology", ""),
|
267 |
+
# "Discussion": content_sections.get("Discussion", ""),
|
268 |
+
# "Future_Scope": content_sections.get("Future_Scope", ""),
|
269 |
+
# "Theory": content_sections.get("Theory", ""),
|
270 |
+
# "Independent_Variables": variables_info.get("Independent_Variables", ""),
|
271 |
+
# "nof_Independent_Variables": variables_info.get("nof_Independent_Variables", 0),
|
272 |
+
# "Dependent_Variables": variables_info.get("Dependent_Variables", ""),
|
273 |
+
# "nof_Dependent_Variables": variables_info.get("nof_Dependent_Variables", 0),
|
274 |
+
# "Control_Variables": variables_info.get("Control_Variables", ""),
|
275 |
+
# "Extraneous_Variables": variables_info.get("Extraneous_Variables", ""),
|
276 |
+
# "nof_Control_Variables": variables_info.get("nof_Control_Variables", 0),
|
277 |
+
# "nof_Extraneous_Variables": variables_info.get("nof_Extraneous_Variables", 0),
|
278 |
+
# }
|
279 |
+
|
280 |
+
# return paper_doc
|
281 |
+
|
282 |
+
# filepath: /c:/Users/acer/OneDrive/Documents/GitHub/res-cor/research22.py
|
283 |
+
# ...existing code continues...
|
284 |
+
|
285 |
+
# --------------------------------------------------------------------------------
|
286 |
+
# 3. Paper Type Attributes
|
287 |
+
# --------------------------------------------------------------------------------
|
288 |
+
PAPER_TYPE_ATTRIBUTES = {
|
289 |
+
"Review Based Paper": [
|
290 |
+
"Title",
|
291 |
+
"Publication",
|
292 |
+
"Journal_Conference",
|
293 |
+
"Abstract",
|
294 |
+
"Keywords",
|
295 |
+
"Author",
|
296 |
+
"Date_of_Publication",
|
297 |
+
"Intro",
|
298 |
+
"Literature_Review",
|
299 |
+
"Body",
|
300 |
+
"Protocol",
|
301 |
+
"Search String",
|
302 |
+
"Included Studies",
|
303 |
+
"Data Collection and Analysis Methods",
|
304 |
+
"Data Extraction Table",
|
305 |
+
"Synthesis and Analysis",
|
306 |
+
"Conclusion",
|
307 |
+
"Limitations",
|
308 |
+
"Results",
|
309 |
+
"References",
|
310 |
+
"Risk of Bias Assessment",
|
311 |
+
],
|
312 |
+
"Opinion/Perspective Based Paper": [
|
313 |
+
"Title",
|
314 |
+
"Publication",
|
315 |
+
"Journal_Conference",
|
316 |
+
"Abstract",
|
317 |
+
"Keywords",
|
318 |
+
"Author",
|
319 |
+
"Date_of_Publication",
|
320 |
+
"Intro",
|
321 |
+
"Literature_Review",
|
322 |
+
"Introduction",
|
323 |
+
"Body",
|
324 |
+
"Results and Discussion",
|
325 |
+
"Conclusion",
|
326 |
+
"References",
|
327 |
+
],
|
328 |
+
"Empirical Research Paper": [
|
329 |
+
"Title",
|
330 |
+
"Publication",
|
331 |
+
"Journal_Conference",
|
332 |
+
"Abstract",
|
333 |
+
"Keywords",
|
334 |
+
"Author",
|
335 |
+
"Date_of_Publication",
|
336 |
+
"Intro",
|
337 |
+
"Literature_Review",
|
338 |
+
"Introduction",
|
339 |
+
"Body",
|
340 |
+
"Methodology",
|
341 |
+
"Participants",
|
342 |
+
"Survey Instrument",
|
343 |
+
"Data Collection",
|
344 |
+
"Data Analysis",
|
345 |
+
"Results and Discussion",
|
346 |
+
"Conclusion",
|
347 |
+
"References",
|
348 |
+
],
|
349 |
+
"Research Paper (Other)": [
|
350 |
+
"Title",
|
351 |
+
"Publication",
|
352 |
+
"Journal_Conference",
|
353 |
+
"Abstract",
|
354 |
+
"Keywords",
|
355 |
+
"Author",
|
356 |
+
"Date_of_Publication",
|
357 |
+
"Intro",
|
358 |
+
"Literature_Review",
|
359 |
+
"Research_Models_Used",
|
360 |
+
"Methodology",
|
361 |
+
"Discussion",
|
362 |
+
"Future_Scope",
|
363 |
+
"Theory",
|
364 |
+
"Independent_Variables",
|
365 |
+
"nof_Independent_Variables",
|
366 |
+
"Dependent_Variables",
|
367 |
+
"nof_Dependent_Variables",
|
368 |
+
"Control_Variables",
|
369 |
+
"Extraneous_Variables",
|
370 |
+
"nof_Control_Variables",
|
371 |
+
"nof_Extraneous_Variables",
|
372 |
+
],
|
373 |
+
}
|
374 |
+
|
375 |
+
|
376 |
+
# --------------------------------------------------------------------------------
|
377 |
+
# 4. Extract Paper Fields
|
378 |
+
# --------------------------------------------------------------------------------
|
379 |
+
def extract_paper_fields(text: str, paper_type: str) -> Dict[str, Any]:
|
380 |
+
"""
|
381 |
+
Use Gemini to extract fields based on the paper type attributes,
|
382 |
+
then return a dictionary of extracted fields.
|
383 |
+
"""
|
384 |
+
if paper_type not in PAPER_TYPE_ATTRIBUTES:
|
385 |
+
st.error("Invalid paper type selected.")
|
386 |
+
return {}
|
387 |
+
|
388 |
+
selected_attrs = PAPER_TYPE_ATTRIBUTES[paper_type]
|
389 |
+
prompt = f"""
|
390 |
+
Extract the following fields from the research paper text below:
|
391 |
+
|
392 |
+
{", ".join(selected_attrs)}
|
393 |
+
|
394 |
+
Paper text:
|
395 |
+
{text}
|
396 |
+
|
397 |
+
Return them in this JSON format strictly, with no extra text:
|
398 |
+
[
|
399 |
+
{{
|
400 |
+
{", ".join([f'"{attr}": "value"' for attr in selected_attrs])}
|
401 |
+
}}
|
402 |
+
]
|
403 |
+
"""
|
404 |
+
|
405 |
+
try:
|
406 |
+
response = get_gemini_response(prompt)
|
407 |
+
if not response:
|
408 |
+
st.error("No response from Gemini.")
|
409 |
+
return {}
|
410 |
+
|
411 |
+
# Clean up any text around JSON
|
412 |
+
# Clean up any text around JSON
|
413 |
+
raw_text = response.strip()
|
414 |
+
|
415 |
+
# Find start and end of JSON
|
416 |
+
json_start = raw_text.find("[")
|
417 |
+
json_end = raw_text.rfind("]") + 1
|
418 |
+
json_str = raw_text[json_start:json_end]
|
419 |
+
|
420 |
+
# Try removing trailing commas, extra quotes, etc.
|
421 |
+
json_str = re.sub(r",\s*}", "}", json_str)
|
422 |
+
json_str = re.sub(r",\s*\]", "]", json_str)
|
423 |
+
|
424 |
+
try:
|
425 |
+
data = json.loads(json_str)
|
426 |
+
except json.JSONDecodeError as e:
|
427 |
+
st.warning(f"Fixing JSON errors: {str(e)}")
|
428 |
+
# As a last-resort attempt, remove anything after the last curly bracket
|
429 |
+
bracket_pos = json_str.rfind("}")
|
430 |
+
if bracket_pos != -1:
|
431 |
+
json_str = json_str[: bracket_pos + 1]
|
432 |
+
# Try again
|
433 |
+
data = json.loads(json_str)
|
434 |
+
|
435 |
+
if isinstance(data, list) and len(data) > 0:
|
436 |
+
return data[0]
|
437 |
+
else:
|
438 |
+
st.error("Gemini did not return a valid JSON array.")
|
439 |
+
return {}
|
440 |
+
except Exception as e:
|
441 |
+
st.error(f"Error in Gemini extraction: {str(e)}")
|
442 |
+
return {}
|
443 |
+
|
444 |
+
|
445 |
+
# --------------------------------------------------------------------------------
|
446 |
+
# 5. Process Paper and Save
|
447 |
+
# --------------------------------------------------------------------------------
|
448 |
+
def process_paper(text: str, paper_type: str):
|
449 |
+
"""
|
450 |
+
Extract paper fields based on paper type, then save to
|
451 |
+
the corresponding MongoDB collection.
|
452 |
+
"""
|
453 |
+
db = create_db_connection()
|
454 |
+
if not db:
|
455 |
+
return
|
456 |
+
|
457 |
+
# Determine collection name
|
458 |
+
collection_name = paper_type.replace(" ", "_").lower()
|
459 |
+
collection = db[collection_name]
|
460 |
+
|
461 |
+
# Extract fields
|
462 |
+
extracted_data = extract_paper_fields(text, paper_type)
|
463 |
+
if extracted_data:
|
464 |
+
# Insert into MongoDB
|
465 |
+
collection.insert_one(extracted_data)
|
466 |
+
return extracted_data
|
467 |
+
return {}
|
468 |
+
|
469 |
+
|
470 |
+
# --------------------------------------------------------------------------------
|
471 |
+
# 6. Streamlit UI for Paper Extraction
|
472 |
+
# --------------------------------------------------------------------------------
|
473 |
+
def main():
|
474 |
+
# st.set_page_config(page_title="Extract Research Paper", layout="wide")
|
475 |
+
st.title("Extract Research Paper")
|
476 |
+
|
477 |
+
paper_type = st.selectbox(
|
478 |
+
"Select type of research paper:",
|
479 |
+
[
|
480 |
+
"Review Based Paper",
|
481 |
+
"Opinion/Perspective Based Paper",
|
482 |
+
"Empirical Research Paper",
|
483 |
+
"Research Paper (Other)",
|
484 |
+
],
|
485 |
+
)
|
486 |
+
|
487 |
+
uploaded_file = st.file_uploader("Upload a PDF or text file", type=["pdf", "txt"])
|
488 |
+
|
489 |
+
if st.button("Extract & Save") and uploaded_file:
|
490 |
+
try:
|
491 |
+
# Read file content
|
492 |
+
if uploaded_file.type == "application/pdf":
|
493 |
+
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
494 |
+
text_content = ""
|
495 |
+
for page in pdf_reader.pages:
|
496 |
+
text_content += page.extract_text()
|
497 |
+
else:
|
498 |
+
text_content = uploaded_file.read().decode("utf-8", errors="replace")
|
499 |
+
|
500 |
+
with st.spinner("Extracting fields..."):
|
501 |
+
data = process_paper(text_content, paper_type)
|
502 |
+
|
503 |
+
if data:
|
504 |
+
st.success(
|
505 |
+
f"Paper extracted and saved to MongoDB in '{paper_type}' collection!"
|
506 |
+
)
|
507 |
+
st.write("Extracted fields:")
|
508 |
+
st.json(data)
|
509 |
+
|
510 |
+
except Exception as e:
|
511 |
+
st.error(f"An error occurred: {str(e)}")
|
512 |
+
|
513 |
+
|
514 |
+
# ...existing code (if any)...
|
515 |
+
|
516 |
+
if __name__ == "__main__":
|
517 |
+
main()
|
research3.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import requests
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
11 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
12 |
+
|
13 |
+
|
14 |
+
def call_perplexity_api(prompt: str) -> str:
|
15 |
+
"""Call Perplexity AI with a prompt, return the text response if successful."""
|
16 |
+
headers = {
|
17 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
18 |
+
"Content-Type": "application/json",
|
19 |
+
}
|
20 |
+
payload = {
|
21 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
22 |
+
"messages": [{"role": "user", "content": prompt}],
|
23 |
+
"temperature": 0.3,
|
24 |
+
}
|
25 |
+
|
26 |
+
try:
|
27 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
28 |
+
response.raise_for_status()
|
29 |
+
return response.json()["choices"][0]["message"]["content"]
|
30 |
+
except Exception as e:
|
31 |
+
st.error(f"API Error: {str(e)}")
|
32 |
+
return ""
|
33 |
+
|
34 |
+
|
35 |
+
def generate_research_paper(df: pd.DataFrame, topic: str) -> dict:
|
36 |
+
"""
|
37 |
+
For each column in the DataFrame, generate a research paper section (200-500 words)
|
38 |
+
that addresses the data in that column on the given topic. Return a dict: column -> text.
|
39 |
+
"""
|
40 |
+
paper_sections = {}
|
41 |
+
for col in df.columns:
|
42 |
+
# Convert all non-null rows in the column to strings and join them for context
|
43 |
+
col_values = df[col].dropna().astype(str).tolist()
|
44 |
+
# We'll truncate if there's a ton of text
|
45 |
+
sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
|
46 |
+
|
47 |
+
prompt = f"""
|
48 |
+
Topic: {topic}
|
49 |
+
Column: {col}
|
50 |
+
Data Samples: {sample_text}
|
51 |
+
|
52 |
+
Generate a well-structured research paper section that addresses the topic above,
|
53 |
+
referencing relevant information from the column data.
|
54 |
+
The section should be at least 100 words and at most 150 words.
|
55 |
+
Provide insights, examples, and possible research directions integrating the corpus data.
|
56 |
+
"""
|
57 |
+
section_text = call_perplexity_api(prompt)
|
58 |
+
paper_sections[col] = section_text.strip() if section_text else ""
|
59 |
+
return paper_sections
|
60 |
+
|
61 |
+
|
62 |
+
def format_paper(paper_dict: dict, topic: str) -> str:
|
63 |
+
"""
|
64 |
+
Format the generated paper into a Markdown string.
|
65 |
+
Add the topic as the main title, each column name as a heading, and
|
66 |
+
the corresponding text as paragraph content.
|
67 |
+
"""
|
68 |
+
md_text = f"# Research Paper on: {topic}\n\n"
|
69 |
+
for col, content in paper_dict.items():
|
70 |
+
md_text += f"## {col}\n{content}\n\n"
|
71 |
+
return md_text
|
72 |
+
|
73 |
+
|
74 |
+
def main():
|
75 |
+
st.title("Topic + Corpus-Based Research Paper Generator")
|
76 |
+
|
77 |
+
topic_input = st.text_input("Enter the topic for the research paper:")
|
78 |
+
uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
|
79 |
+
|
80 |
+
if uploaded_file:
|
81 |
+
df = pd.read_csv(uploaded_file)
|
82 |
+
st.write("### Preview of Uploaded Data")
|
83 |
+
st.dataframe(df.head())
|
84 |
+
|
85 |
+
if st.button("Generate Research Paper"):
|
86 |
+
if topic_input.strip():
|
87 |
+
st.info("Generating paper based on the topic and the corpus columns...")
|
88 |
+
with st.spinner("Calling Perplexity AI..."):
|
89 |
+
paper = generate_research_paper(df, topic_input)
|
90 |
+
if paper:
|
91 |
+
formatted_paper = format_paper(paper, topic_input)
|
92 |
+
st.success("Research Paper Generated Successfully!")
|
93 |
+
st.write(formatted_paper)
|
94 |
+
|
95 |
+
st.download_button(
|
96 |
+
label="Download Paper as Markdown",
|
97 |
+
data=formatted_paper,
|
98 |
+
file_name="research_paper.md",
|
99 |
+
mime="text/markdown",
|
100 |
+
)
|
101 |
+
else:
|
102 |
+
st.error(
|
103 |
+
"Paper generation failed. Please check Perplexity API key."
|
104 |
+
)
|
105 |
+
else:
|
106 |
+
st.warning("Please enter a valid topic.")
|
107 |
+
|
108 |
+
|
109 |
+
if __name__ == "__main__":
|
110 |
+
main()
|
research_assistant_dashboard.py
ADDED
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import streamlit as st
|
2 |
+
# from openai import OpenAI
|
3 |
+
# import os
|
4 |
+
# from dotenv import load_dotenv
|
5 |
+
# from llama_index.core import (
|
6 |
+
# VectorStoreIndex,
|
7 |
+
# SimpleDirectoryReader,
|
8 |
+
# Document,
|
9 |
+
# GPTVectorStoreIndex,
|
10 |
+
# )
|
11 |
+
# from bson import ObjectId
|
12 |
+
# import requests
|
13 |
+
# import openai
|
14 |
+
# import numpy as np
|
15 |
+
# from pymongo import MongoClient
|
16 |
+
# from bson import ObjectId
|
17 |
+
# from datetime import datetime
|
18 |
+
# from llama_index.embeddings.openai import OpenAIEmbedding
|
19 |
+
# from typing import List, Dict
|
20 |
+
|
21 |
+
# # Initialize Perplexity API and OpenAI API
|
22 |
+
# load_dotenv()
|
23 |
+
# perplexity_api_key = os.getenv("PERPLEXITY_KEY")
|
24 |
+
# openai.api_key = os.getenv("OPENAI_KEY")
|
25 |
+
|
26 |
+
# # MongoDB setup
|
27 |
+
# MONGO_URI = os.getenv("MONGO_URI")
|
28 |
+
# client = MongoClient(MONGO_URI)
|
29 |
+
# db = client["novascholar_db"]
|
30 |
+
# research_papers_collection = db["research_papers"]
|
31 |
+
|
32 |
+
|
33 |
+
# def fetch_perplexity_data(api_key, topic):
|
34 |
+
# """
|
35 |
+
# Fetch research papers data from Perplexity API with proper formatting
|
36 |
+
# """
|
37 |
+
# headers = {
|
38 |
+
# "accept": "application/json",
|
39 |
+
# "content-type": "application/json",
|
40 |
+
# "authorization": f"Bearer {api_key}",
|
41 |
+
# }
|
42 |
+
|
43 |
+
# # Structured prompt to get properly formatted response
|
44 |
+
# messages = [
|
45 |
+
# {
|
46 |
+
# "role": "system",
|
47 |
+
# "content": """You are a research paper retrieval expert. For the given topic, return exactly 10 research papers in the following format:
|
48 |
+
# Title: Paper Title
|
49 |
+
# Authors: Author 1, Author 2
|
50 |
+
# Year: YYYY
|
51 |
+
# Content: Detailed paper content with abstract and key findings
|
52 |
+
# URL: DOI or paper URL
|
53 |
+
# """,
|
54 |
+
# },
|
55 |
+
# {"role": "user", "content": f"Find 10 research papers about: {topic}"},
|
56 |
+
# ]
|
57 |
+
|
58 |
+
# try:
|
59 |
+
# client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
60 |
+
# response = client.chat.completions.create(
|
61 |
+
# model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
|
62 |
+
# messages=messages,
|
63 |
+
# )
|
64 |
+
|
65 |
+
# # Extract and validate response
|
66 |
+
# content = response.choices[0].message.content
|
67 |
+
# st.write("Fetched Data:", content) # Debugging line to check the fetched data
|
68 |
+
|
69 |
+
# return content
|
70 |
+
|
71 |
+
# except Exception as e:
|
72 |
+
# st.error(f"Failed to fetch data from Perplexity API: {str(e)}")
|
73 |
+
# return ""
|
74 |
+
|
75 |
+
|
76 |
+
# def split_and_vectorize_papers(content: str) -> List[Dict]:
|
77 |
+
# """Split and vectorize papers using OpenAI embeddings"""
|
78 |
+
# papers = content.split("\n\n")
|
79 |
+
|
80 |
+
# # Initialize OpenAI client
|
81 |
+
# # client = OpenAI() # Uses api_key from environment variable
|
82 |
+
# vectors = []
|
83 |
+
|
84 |
+
# for paper in papers:
|
85 |
+
# try:
|
86 |
+
# # Get embedding using OpenAI's API directly
|
87 |
+
# response = openai.embeddings.create(
|
88 |
+
# model="text-embedding-ada-002", input=paper, encoding_format="float"
|
89 |
+
# )
|
90 |
+
|
91 |
+
# # Extract embedding from response
|
92 |
+
# embedding = response.data[0].embedding
|
93 |
+
|
94 |
+
# vectors.append(
|
95 |
+
# {"content": paper, "vector": embedding, "timestamp": datetime.utcnow()}
|
96 |
+
# )
|
97 |
+
|
98 |
+
# except Exception as e:
|
99 |
+
# st.error(f"Error vectorizing paper: {str(e)}")
|
100 |
+
# continue
|
101 |
+
|
102 |
+
# return vectors
|
103 |
+
|
104 |
+
|
105 |
+
# def store_papers_in_mongodb(papers):
|
106 |
+
# """Store papers with vectors in MongoDB"""
|
107 |
+
# try:
|
108 |
+
# for paper in papers:
|
109 |
+
# # Prepare MongoDB document
|
110 |
+
# mongo_doc = {
|
111 |
+
# "content": paper["content"],
|
112 |
+
# "vector": paper["vector"],
|
113 |
+
# "created_at": datetime.utcnow(),
|
114 |
+
# }
|
115 |
+
|
116 |
+
# # Insert into MongoDB
|
117 |
+
# db.papers.update_one(
|
118 |
+
# {"content": paper["content"]}, {"$set": mongo_doc}, upsert=True
|
119 |
+
# )
|
120 |
+
|
121 |
+
# st.success(f"Stored {len(papers)} papers in database")
|
122 |
+
# return True
|
123 |
+
# except Exception as e:
|
124 |
+
# st.error(f"Error storing papers: {str(e)}")
|
125 |
+
|
126 |
+
|
127 |
+
# def get_research_papers(query):
|
128 |
+
# """
|
129 |
+
# Get and store research papers with improved error handling
|
130 |
+
# """
|
131 |
+
# # Fetch papers from Perplexity
|
132 |
+
# content = fetch_perplexity_data(perplexity_api_key, query)
|
133 |
+
|
134 |
+
# if not content:
|
135 |
+
# return []
|
136 |
+
|
137 |
+
# # Split and vectorize papers
|
138 |
+
# papers = split_and_vectorize_papers(content)
|
139 |
+
|
140 |
+
# # Store papers in MongoDB
|
141 |
+
# if store_papers_in_mongodb(papers):
|
142 |
+
# return papers
|
143 |
+
# else:
|
144 |
+
# st.warning("Failed to store papers in database, but returning fetched results")
|
145 |
+
# return papers
|
146 |
+
|
147 |
+
|
148 |
+
# def analyze_research_gaps(papers):
|
149 |
+
# """
|
150 |
+
# Analyze research gaps with improved prompt and error handling
|
151 |
+
# """
|
152 |
+
# if not papers:
|
153 |
+
# return "No papers provided for analysis"
|
154 |
+
|
155 |
+
# # Prepare paper summaries for analysis
|
156 |
+
# paper_summaries = "\n\n".join(
|
157 |
+
# [
|
158 |
+
# f"Key Findings: {paper['content'][:500]}..."
|
159 |
+
# # f"Title: {paper['title']}\nYear: {paper['year']}\nKey Findings: {paper['content'][:500]}..."
|
160 |
+
# for paper in papers
|
161 |
+
# ]
|
162 |
+
# )
|
163 |
+
|
164 |
+
# headers = {
|
165 |
+
# "Authorization": f"Bearer {perplexity_api_key}",
|
166 |
+
# "Content-Type": "application/json",
|
167 |
+
# }
|
168 |
+
|
169 |
+
# data = {
|
170 |
+
# "messages": [
|
171 |
+
# {
|
172 |
+
# "role": "system",
|
173 |
+
# "content": "You are a research analysis expert. Identify specific research gaps and future research directions based on the provided papers. Format your response with clear sections: Current State, Identified Gaps, and Future Directions.",
|
174 |
+
# },
|
175 |
+
# {
|
176 |
+
# "role": "user",
|
177 |
+
# "content": f"Analyze these papers and identify research gaps:\n\n{paper_summaries}",
|
178 |
+
# },
|
179 |
+
# ]
|
180 |
+
# }
|
181 |
+
|
182 |
+
# try:
|
183 |
+
# client = OpenAI(
|
184 |
+
# api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
|
185 |
+
# )
|
186 |
+
# response = client.chat.completions.create(
|
187 |
+
# model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
|
188 |
+
# messages=data["messages"],
|
189 |
+
# )
|
190 |
+
# return response.choices[0].message.content
|
191 |
+
|
192 |
+
# except Exception as e:
|
193 |
+
# st.error(f"Failed to analyze research gaps: {str(e)}")
|
194 |
+
# return "Error analyzing research gaps"
|
195 |
+
|
196 |
+
|
197 |
+
# def create_research_paper(gaps, topic, papers):
|
198 |
+
# """
|
199 |
+
# Create a research paper that addresses the identified gaps using Perplexity API
|
200 |
+
# """
|
201 |
+
# full_texts = "\n\n".join([paper["content"] for paper in papers])
|
202 |
+
# headers = {
|
203 |
+
# "Authorization": f"Bearer {perplexity_api_key}",
|
204 |
+
# "Content-Type": "application/json",
|
205 |
+
# }
|
206 |
+
# data = {
|
207 |
+
# "messages": [
|
208 |
+
# {
|
209 |
+
# "role": "system",
|
210 |
+
# "content": "You are a research paper generation expert. Create a comprehensive research paper that addresses the identified gaps based on the provided papers. Format your response with clear sections: Introduction, Literature Review, Methodology, Results, Discussion, Conclusion, and References.",
|
211 |
+
# },
|
212 |
+
# {
|
213 |
+
# "role": "user",
|
214 |
+
# "content": f"Create a research paper on the topic '{topic}' that addresses the following research gaps:\n\n{gaps}\n\nBased on the following papers:\n\n{full_texts}",
|
215 |
+
# },
|
216 |
+
# ]
|
217 |
+
# }
|
218 |
+
# try:
|
219 |
+
# client = OpenAI(
|
220 |
+
# api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
|
221 |
+
# )
|
222 |
+
# response = client.chat.completions.create(
|
223 |
+
# model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
|
224 |
+
# messages=data["messages"],
|
225 |
+
# )
|
226 |
+
# return response.choices[0].message.content
|
227 |
+
|
228 |
+
# except Exception as e:
|
229 |
+
# st.error(f"Failed to create research paper: {str(e)}")
|
230 |
+
# return "Error creating research paper"
|
231 |
+
|
232 |
+
|
233 |
+
# def cosine_similarity(vec1, vec2):
|
234 |
+
# """Calculate the cosine similarity between two vectors"""
|
235 |
+
# vec1 = np.array(vec1)
|
236 |
+
# vec2 = np.array(vec2)
|
237 |
+
# return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
238 |
+
|
239 |
+
|
240 |
+
# def calculate_cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
|
241 |
+
# """Calculate cosine similarity between two vectors"""
|
242 |
+
# return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
243 |
+
|
244 |
+
|
245 |
+
# def display_research_assistant_dashboard():
|
246 |
+
# """Display research assistant dashboard"""
|
247 |
+
# # Initialize session state for recommendations
|
248 |
+
# if "recommendations" not in st.session_state:
|
249 |
+
# st.session_state.recommendations = None
|
250 |
+
# if "vectors" not in st.session_state:
|
251 |
+
# st.session_state.vectors = None
|
252 |
+
# if "generated_paper" not in st.session_state:
|
253 |
+
# st.session_state.generated_paper = None
|
254 |
+
|
255 |
+
# # Sidebar
|
256 |
+
# with st.sidebar:
|
257 |
+
# st.title(f"Welcome, {st.session_state.username}")
|
258 |
+
# if st.button("Logout", use_container_width=True):
|
259 |
+
# for key in st.session_state.keys():
|
260 |
+
# del st.session_state[key]
|
261 |
+
# st.rerun()
|
262 |
+
|
263 |
+
# # Main content
|
264 |
+
# st.title("Research Paper Recommendations")
|
265 |
+
# search_query = st.text_input("Enter research topic:")
|
266 |
+
# col1, col2 = st.columns(2)
|
267 |
+
# with col1:
|
268 |
+
# if st.button("Get Research Papers"):
|
269 |
+
# if search_query:
|
270 |
+
# with st.spinner("Fetching recommendations..."):
|
271 |
+
# st.session_state.recommendations = get_research_papers(search_query)
|
272 |
+
# st.session_state.vectors = [
|
273 |
+
# paper["vector"] for paper in st.session_state.recommendations
|
274 |
+
# ]
|
275 |
+
# st.markdown(
|
276 |
+
# "\n\n".join(
|
277 |
+
# [
|
278 |
+
# f"**{i+1}.**\n{paper['content']}"
|
279 |
+
# # f"**{i+1}. {paper['title']}**\n{paper['content']}"
|
280 |
+
# for i, paper in enumerate(
|
281 |
+
# st.session_state.recommendations
|
282 |
+
# )
|
283 |
+
# ]
|
284 |
+
# )
|
285 |
+
# )
|
286 |
+
# else:
|
287 |
+
# st.warning("Please enter a search query")
|
288 |
+
# with col2:
|
289 |
+
# if st.button("Analyze Research Gaps"):
|
290 |
+
# if st.session_state.recommendations:
|
291 |
+
# with st.spinner("Analyzing research gaps..."):
|
292 |
+
# gaps = analyze_research_gaps(st.session_state.recommendations)
|
293 |
+
# st.session_state.generated_paper = create_research_paper(
|
294 |
+
# gaps, search_query, st.session_state.recommendations
|
295 |
+
# )
|
296 |
+
# st.markdown("### Potential Research Gaps")
|
297 |
+
# st.markdown(gaps)
|
298 |
+
# else:
|
299 |
+
# st.warning("Please get research papers first")
|
300 |
+
|
301 |
+
# if st.button("Save and Vectorize"):
|
302 |
+
# if st.session_state.generated_paper:
|
303 |
+
# try:
|
304 |
+
# # Initialize OpenAI client
|
305 |
+
|
306 |
+
# # Get embedding for generated paper
|
307 |
+
# response = openai.embeddings.create(
|
308 |
+
# model="text-embedding-ada-002",
|
309 |
+
# input=st.session_state.generated_paper,
|
310 |
+
# encoding_format="float",
|
311 |
+
# )
|
312 |
+
# generated_vector = response.data[0].embedding
|
313 |
+
|
314 |
+
# # Calculate similarities with stored vectors
|
315 |
+
# similarities = [
|
316 |
+
# calculate_cosine_similarity(generated_vector, paper_vector)
|
317 |
+
# for paper_vector in st.session_state.vectors
|
318 |
+
# ]
|
319 |
+
|
320 |
+
# # Display results
|
321 |
+
# st.markdown("### Generated Research Paper")
|
322 |
+
# st.markdown(st.session_state.generated_paper)
|
323 |
+
|
324 |
+
# st.markdown("### Cosine Similarities with Original Papers")
|
325 |
+
# for i, similarity in enumerate(similarities):
|
326 |
+
# st.metric(
|
327 |
+
# f"Paper {i+1}",
|
328 |
+
# value=f"{similarity:.3f}",
|
329 |
+
# help="Cosine similarity (1.0 = identical, 0.0 = completely different)",
|
330 |
+
# )
|
331 |
+
|
332 |
+
# except Exception as e:
|
333 |
+
# st.error(f"Error during vectorization: {str(e)}")
|
334 |
+
# else:
|
335 |
+
# st.warning("Please analyze research gaps first")
|
336 |
+
|
337 |
+
|
338 |
+
# # Run the dashboard
|
339 |
+
# if __name__ == "__main__":
|
340 |
+
# display_research_assistant_dashboard()
|
341 |
+
|
342 |
+
import research_combine2
|
343 |
+
# if __name__ == "__main__":
|
344 |
+
# display_research_assistant_dashboard()
|
345 |
+
def display_research_assistant_dashboard():
|
346 |
+
research_combine2.display_research_assistant_dashboard()
|
347 |
+
|
348 |
+
|
349 |
+
|
research_combine.py
ADDED
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
import requests
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
from pymongo import MongoClient
|
7 |
+
from typing import Dict, Any
|
8 |
+
|
9 |
+
# Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
12 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
13 |
+
MONGODB_URI = os.getenv(
|
14 |
+
"MONGODB_UR",
|
15 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
16 |
+
)
|
17 |
+
|
18 |
+
# MongoDB setup
|
19 |
+
client = MongoClient(MONGODB_URI)
|
20 |
+
db = client["novascholar_db"]
|
21 |
+
collection = db["research_papers"]
|
22 |
+
|
23 |
+
|
24 |
+
def search_papers(topic: str, num_papers: int) -> str:
|
25 |
+
headers = {
|
26 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
27 |
+
"Content-Type": "application/json",
|
28 |
+
}
|
29 |
+
|
30 |
+
prompt = f"""Find {num_papers} recent research papers about {topic}.
|
31 |
+
Return ONLY a valid JSON array with the following structure for each paper, no additional text:
|
32 |
+
[
|
33 |
+
{{
|
34 |
+
"Title": "paper title",
|
35 |
+
"Publication": "publication name",
|
36 |
+
"Journal_Conference": "venue name",
|
37 |
+
"Abstract": "abstract text",
|
38 |
+
"Keywords": "key terms",
|
39 |
+
"Author": "author names",
|
40 |
+
"Date_of_Publication": "publication date",
|
41 |
+
"Intro": "introduction summary",
|
42 |
+
"Literature_Review": "literature review summary",
|
43 |
+
"Research_Models_Used": "models description",
|
44 |
+
"Methodology": "methodology description",
|
45 |
+
"Discussion": "discussion summary",
|
46 |
+
"Future_Scope": "future work",
|
47 |
+
"Theory": "theoretical framework",
|
48 |
+
"Independent_Variables": "list of variables",
|
49 |
+
"nof_Independent_Variables": 0,
|
50 |
+
"Dependent_Variables": "list of variables",
|
51 |
+
"nof_Dependent_Variables": 0,
|
52 |
+
"Control_Variables": "list of variables",
|
53 |
+
"nof_Control_Variables": 0,
|
54 |
+
"Extraneous_Variables": "list of variables",
|
55 |
+
"nof_Extraneous_Variables": 0
|
56 |
+
}}
|
57 |
+
]"""
|
58 |
+
|
59 |
+
payload = {
|
60 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
61 |
+
"messages": [
|
62 |
+
{
|
63 |
+
"role": "system",
|
64 |
+
"content": "You are a research paper analyzer that returns only valid JSON arrays.",
|
65 |
+
},
|
66 |
+
{"role": "user", "content": prompt},
|
67 |
+
],
|
68 |
+
"temperature": 0.1,
|
69 |
+
}
|
70 |
+
|
71 |
+
try:
|
72 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
73 |
+
response.raise_for_status()
|
74 |
+
content = response.json()["choices"][0]["message"]["content"]
|
75 |
+
|
76 |
+
# Clean response and ensure it's valid JSON
|
77 |
+
content = content.strip()
|
78 |
+
if not content.startswith("["):
|
79 |
+
content = content[content.find("[") :]
|
80 |
+
if not content.endswith("]"):
|
81 |
+
content = content[: content.rfind("]") + 1]
|
82 |
+
|
83 |
+
# Validate JSON
|
84 |
+
papers = json.loads(content)
|
85 |
+
if not isinstance(papers, list):
|
86 |
+
raise ValueError("Response is not a JSON array")
|
87 |
+
|
88 |
+
# Insert into MongoDB
|
89 |
+
if papers:
|
90 |
+
collection.insert_many(papers)
|
91 |
+
return content
|
92 |
+
return "[]"
|
93 |
+
|
94 |
+
except json.JSONDecodeError as e:
|
95 |
+
st.error(f"Invalid JSON response: {str(e)}")
|
96 |
+
return None
|
97 |
+
except Exception as e:
|
98 |
+
st.error(f"Error: {str(e)}")
|
99 |
+
return None
|
100 |
+
|
101 |
+
|
102 |
+
import research22
|
103 |
+
import keywords_database_download
|
104 |
+
import new_keywords
|
105 |
+
import infranew
|
106 |
+
import loldude
|
107 |
+
import new_research_paper
|
108 |
+
import research3
|
109 |
+
import entire_download
|
110 |
+
|
111 |
+
|
112 |
+
def main():
|
113 |
+
st.set_page_config(page_title="Research Papers", layout="wide")
|
114 |
+
|
115 |
+
st.title("Research Papers")
|
116 |
+
|
117 |
+
# Sidebar radio
|
118 |
+
option = st.sidebar.radio(
|
119 |
+
"Select an option",
|
120 |
+
[
|
121 |
+
"Search Papers",
|
122 |
+
"Upload Paper",
|
123 |
+
"Single Keyword Search",
|
124 |
+
"Multiple Keywords Search",
|
125 |
+
"Knowledge Graph",
|
126 |
+
"Cosine Similarity",
|
127 |
+
"Paper Generator",
|
128 |
+
"Paper from Topic",
|
129 |
+
"Download Entire Corpus",
|
130 |
+
],
|
131 |
+
)
|
132 |
+
|
133 |
+
if option == "Search Papers":
|
134 |
+
st.subheader("Search and Store Papers")
|
135 |
+
|
136 |
+
topic = st.text_input("Enter research topic")
|
137 |
+
num_papers = st.number_input(
|
138 |
+
"Number of papers", min_value=1, max_value=10, value=5
|
139 |
+
)
|
140 |
+
|
141 |
+
if st.button("Search and Store"):
|
142 |
+
if topic:
|
143 |
+
with st.spinner(f"Searching and storing papers about {topic}..."):
|
144 |
+
results = search_papers(topic, num_papers)
|
145 |
+
if results:
|
146 |
+
st.success(
|
147 |
+
f"Successfully stored {num_papers} papers in MongoDB"
|
148 |
+
)
|
149 |
+
# Display results
|
150 |
+
papers = json.loads(results)
|
151 |
+
for paper in papers:
|
152 |
+
with st.expander(paper["Title"]):
|
153 |
+
for key, value in paper.items():
|
154 |
+
if key != "Title":
|
155 |
+
st.write(f"**{key}:** {value}")
|
156 |
+
else:
|
157 |
+
st.warning("Please enter a research topic")
|
158 |
+
|
159 |
+
# Add MongoDB connection status
|
160 |
+
if st.sidebar.button("Check Database Connection"):
|
161 |
+
try:
|
162 |
+
client.admin.command("ping")
|
163 |
+
print(MONGODB_URI)
|
164 |
+
st.sidebar.success("Connected to MongoDB")
|
165 |
+
except Exception as e:
|
166 |
+
st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
|
167 |
+
elif option == "Single Keyword Search":
|
168 |
+
keywords_database_download.main()
|
169 |
+
elif option == "Multiple Keywords Search":
|
170 |
+
new_keywords.main()
|
171 |
+
elif option == "Knowledge Graph":
|
172 |
+
infranew.main()
|
173 |
+
elif option == "Cosine Similarity":
|
174 |
+
loldude.main()
|
175 |
+
elif option == "Paper Generator":
|
176 |
+
new_research_paper.main()
|
177 |
+
elif option == "Paper from Topic":
|
178 |
+
research3.main()
|
179 |
+
elif option == "Download Entire Corpus":
|
180 |
+
entire_download.main()
|
181 |
+
else:
|
182 |
+
# st.subheader("Blank Page")
|
183 |
+
# st.write("This is a placeholder for alternative content.")
|
184 |
+
research22.main()
|
185 |
+
|
186 |
+
|
187 |
+
if __name__ == "__main__":
|
188 |
+
main()
|
research_combine2.py
ADDED
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import new_research_paper
|
2 |
+
import research3
|
3 |
+
import entire_download
|
4 |
+
import streamlit as st
|
5 |
+
import os
|
6 |
+
import json
|
7 |
+
import requests
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
from pymongo import MongoClient
|
10 |
+
from typing import Dict, Any
|
11 |
+
import research22
|
12 |
+
import keywords_database_download
|
13 |
+
import new_keywords
|
14 |
+
import infranew
|
15 |
+
import loldude
|
16 |
+
import new_research_paper
|
17 |
+
import research3
|
18 |
+
import entire_download
|
19 |
+
import sciclone
|
20 |
+
import extract
|
21 |
+
|
22 |
+
# Load environment variables
|
23 |
+
load_dotenv()
|
24 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
25 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
26 |
+
MONGODB_URI = os.getenv(
|
27 |
+
"MONGODB_UR",
|
28 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
29 |
+
)
|
30 |
+
|
31 |
+
# MongoDB setup
|
32 |
+
client = MongoClient(MONGODB_URI)
|
33 |
+
db = client["novascholar_db"]
|
34 |
+
|
35 |
+
|
36 |
+
def search_papers(topic: str, num_papers: int, paper_type: str) -> str:
|
37 |
+
headers = {
|
38 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
39 |
+
"Content-Type": "application/json",
|
40 |
+
}
|
41 |
+
|
42 |
+
attributes = {
|
43 |
+
"Review Based Paper": [
|
44 |
+
"Title",
|
45 |
+
"Publication",
|
46 |
+
"Journal_Conference",
|
47 |
+
"Abstract",
|
48 |
+
"Keywords",
|
49 |
+
"Author",
|
50 |
+
"Date_of_Publication",
|
51 |
+
"Intro",
|
52 |
+
"Literature_Review",
|
53 |
+
"Body",
|
54 |
+
"Protocol",
|
55 |
+
"Search String",
|
56 |
+
"Included Studies",
|
57 |
+
"Data Collection and Analysis Methods",
|
58 |
+
"Data Extraction Table",
|
59 |
+
"Synthesis and Analysis",
|
60 |
+
"Conclusion",
|
61 |
+
"Limitations",
|
62 |
+
"Results",
|
63 |
+
"References",
|
64 |
+
"Risk of Bias Assessment",
|
65 |
+
],
|
66 |
+
"Opinion/Perspective Based Paper": [
|
67 |
+
"Title",
|
68 |
+
"Publication",
|
69 |
+
"Journal_Conference",
|
70 |
+
"Abstract",
|
71 |
+
"Keywords",
|
72 |
+
"Author",
|
73 |
+
"Date_of_Publication",
|
74 |
+
"Intro",
|
75 |
+
"Literature_Review",
|
76 |
+
"Introduction",
|
77 |
+
"Body",
|
78 |
+
"Results and Discussion",
|
79 |
+
"Conclusion",
|
80 |
+
"References",
|
81 |
+
],
|
82 |
+
"Empirical Research Paper": [
|
83 |
+
"Title",
|
84 |
+
"Publication",
|
85 |
+
"Journal_Conference",
|
86 |
+
"Abstract",
|
87 |
+
"Keywords",
|
88 |
+
"Author",
|
89 |
+
"Date_of_Publication",
|
90 |
+
"Intro",
|
91 |
+
"Literature_Review",
|
92 |
+
"Introduction",
|
93 |
+
"Body",
|
94 |
+
"Methodology",
|
95 |
+
"Participants",
|
96 |
+
"Survey Instrument",
|
97 |
+
"Data Collection",
|
98 |
+
"Data Analysis",
|
99 |
+
"Results and Discussion",
|
100 |
+
"Conclusion",
|
101 |
+
"References",
|
102 |
+
],
|
103 |
+
"Research Paper (Other)": [
|
104 |
+
"Title",
|
105 |
+
"Publication",
|
106 |
+
"Journal_Conference",
|
107 |
+
"Abstract",
|
108 |
+
"Keywords",
|
109 |
+
"Author",
|
110 |
+
"Date_of_Publication",
|
111 |
+
"Intro",
|
112 |
+
"Literature_Review",
|
113 |
+
"Research_Models_Used",
|
114 |
+
"Methodology",
|
115 |
+
"Discussion",
|
116 |
+
"Future_Scope",
|
117 |
+
"Theory",
|
118 |
+
"Independent_Variables",
|
119 |
+
"nof_Independent_Variables",
|
120 |
+
"Dependent_Variables",
|
121 |
+
"nof_Dependent_Variables",
|
122 |
+
"Control_Variables",
|
123 |
+
"Extraneous_Variables",
|
124 |
+
"nof_Control_Variables",
|
125 |
+
"nof_Extraneous_Variables",
|
126 |
+
],
|
127 |
+
}
|
128 |
+
|
129 |
+
selected_attributes = attributes[paper_type]
|
130 |
+
prompt = f"""Find {num_papers} recent research papers about {topic}.
|
131 |
+
Return ONLY a valid JSON array with the following structure for each paper, no additional text:
|
132 |
+
[{{
|
133 |
+
{", ".join([f'"{attr}": "value"' for attr in selected_attributes])}
|
134 |
+
}}]"""
|
135 |
+
|
136 |
+
payload = {
|
137 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
138 |
+
"messages": [
|
139 |
+
{
|
140 |
+
"role": "system",
|
141 |
+
"content": "You are a research paper analyzer that returns only valid JSON arrays.",
|
142 |
+
},
|
143 |
+
{"role": "user", "content": prompt},
|
144 |
+
],
|
145 |
+
"temperature": 0.1,
|
146 |
+
}
|
147 |
+
|
148 |
+
try:
|
149 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
150 |
+
response.raise_for_status()
|
151 |
+
content = response.json()["choices"][0]["message"]["content"]
|
152 |
+
|
153 |
+
# Clean response and ensure it's valid JSON
|
154 |
+
content = content.strip()
|
155 |
+
if not content.startswith("["):
|
156 |
+
content = content[content.find("[") :]
|
157 |
+
if not content.endswith("]"):
|
158 |
+
content = content[: content.rfind("]") + 1]
|
159 |
+
|
160 |
+
# Validate JSON
|
161 |
+
papers = json.loads(content)
|
162 |
+
if not isinstance(papers, list):
|
163 |
+
raise ValueError("Response is not a JSON array")
|
164 |
+
|
165 |
+
# Insert into MongoDB
|
166 |
+
collection = db[paper_type.replace(" ", "_").lower()]
|
167 |
+
if papers:
|
168 |
+
collection.insert_many(papers)
|
169 |
+
return content
|
170 |
+
return "[]"
|
171 |
+
|
172 |
+
except json.JSONDecodeError as e:
|
173 |
+
st.error(f"Invalid JSON response: {str(e)}")
|
174 |
+
return None
|
175 |
+
except Exception as e:
|
176 |
+
st.error(f"Error: {str(e)}")
|
177 |
+
return None
|
178 |
+
|
179 |
+
|
180 |
+
def display_research_assistant_dashboard():
|
181 |
+
#st.set_page_config(page_title="Research Papers", layout="wide")
|
182 |
+
|
183 |
+
# st.title("Research Papers")
|
184 |
+
|
185 |
+
# Sidebar radio
|
186 |
+
option = st.sidebar.radio(
|
187 |
+
"Select an option",
|
188 |
+
[
|
189 |
+
"Search Papers",
|
190 |
+
"Upload Paper",
|
191 |
+
"Single Keyword Search",
|
192 |
+
"Multiple Keywords Search",
|
193 |
+
"Knowledge Graph",
|
194 |
+
"Cosine Similarity",
|
195 |
+
"Paper Generator",
|
196 |
+
"Paper from Topic",
|
197 |
+
"Download Entire Corpus",
|
198 |
+
"Research Copilot",
|
199 |
+
"Research Paper Analysis Tool",
|
200 |
+
],
|
201 |
+
)
|
202 |
+
|
203 |
+
if option == "Search Papers":
|
204 |
+
st.subheader("Search and Store Papers")
|
205 |
+
|
206 |
+
topic = st.text_input("Enter research topic")
|
207 |
+
num_papers = st.number_input(
|
208 |
+
"Number of papers", min_value=1, max_value=10, value=5
|
209 |
+
)
|
210 |
+
paper_type = st.selectbox(
|
211 |
+
"Select type of research paper",
|
212 |
+
[
|
213 |
+
"Review Based Paper",
|
214 |
+
"Opinion/Perspective Based Paper",
|
215 |
+
"Empirical Research Paper",
|
216 |
+
"Research Paper (Other)",
|
217 |
+
],
|
218 |
+
)
|
219 |
+
|
220 |
+
if st.button("Search and Store"):
|
221 |
+
if topic:
|
222 |
+
with st.spinner(f"Searching and storing papers about {topic}..."):
|
223 |
+
results = search_papers(topic, num_papers, paper_type)
|
224 |
+
if results:
|
225 |
+
st.success(
|
226 |
+
f"Successfully stored {num_papers} papers in MongoDB"
|
227 |
+
)
|
228 |
+
# Display results
|
229 |
+
papers = json.loads(results)
|
230 |
+
for paper in papers:
|
231 |
+
with st.expander(paper["Title"]):
|
232 |
+
for key, value in paper.items():
|
233 |
+
if key != "Title":
|
234 |
+
st.write(f"**{key}:** {value}")
|
235 |
+
else:
|
236 |
+
st.warning("Please enter a research topic")
|
237 |
+
|
238 |
+
# Add MongoDB connection status
|
239 |
+
if st.sidebar.button("Check Database Connection"):
|
240 |
+
try:
|
241 |
+
client.admin.command("ping")
|
242 |
+
print(MONGODB_URI)
|
243 |
+
st.sidebar.success("Connected to MongoDB")
|
244 |
+
except Exception as e:
|
245 |
+
st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
|
246 |
+
elif option == "Single Keyword Search":
|
247 |
+
keywords_database_download.main()
|
248 |
+
elif option == "Multiple Keywords Search":
|
249 |
+
new_keywords.main()
|
250 |
+
elif option == "Knowledge Graph":
|
251 |
+
infranew.main()
|
252 |
+
elif option == "Cosine Similarity":
|
253 |
+
loldude.main()
|
254 |
+
elif option == "Paper Generator":
|
255 |
+
new_research_paper.main()
|
256 |
+
elif option == "Paper from Topic":
|
257 |
+
research3.main()
|
258 |
+
elif option == "Download Entire Corpus":
|
259 |
+
entire_download.main()
|
260 |
+
elif option == "Research Copilot":
|
261 |
+
sciclone.main()
|
262 |
+
elif option == "Research Paper Analysis Tool":
|
263 |
+
extract.main()
|
264 |
+
else:
|
265 |
+
research22.main()
|
266 |
+
|
267 |
+
|
268 |
+
if __name__ == "__main__":
|
269 |
+
display_research_assistant_dashboard()
|
sciclone.py
ADDED
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import PyPDF2
|
4 |
+
from typing import Optional, Dict, List
|
5 |
+
import json
|
6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
+
from concurrent.futures import ThreadPoolExecutor
|
8 |
+
import xml.etree.ElementTree as ET
|
9 |
+
import re
|
10 |
+
from datetime import datetime
|
11 |
+
import time
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
import os
|
14 |
+
import pandas as pd
|
15 |
+
|
16 |
+
# Load environment variables
|
17 |
+
load_dotenv()
|
18 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
19 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
20 |
+
SAPLING_API_KEY = os.getenv("SAPLING_API_KEY")
|
21 |
+
|
22 |
+
|
23 |
+
def call_perplexity_api(prompt: str) -> str:
|
24 |
+
"""Call Perplexity AI with a prompt, return the text response if successful."""
|
25 |
+
headers = {
|
26 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
27 |
+
"Content-Type": "application/json",
|
28 |
+
}
|
29 |
+
|
30 |
+
payload = {
|
31 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
32 |
+
"messages": [{"role": "user", "content": prompt}],
|
33 |
+
"temperature": 0.3,
|
34 |
+
}
|
35 |
+
|
36 |
+
try:
|
37 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
38 |
+
response.raise_for_status()
|
39 |
+
return response.json()["choices"][0]["message"]["content"]
|
40 |
+
except Exception as e:
|
41 |
+
st.error(f"API Error: {str(e)}")
|
42 |
+
return ""
|
43 |
+
|
44 |
+
|
45 |
+
def extract_text_from_pdf(pdf_file):
|
46 |
+
"""Extract text content from a PDF file."""
|
47 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
48 |
+
text = ""
|
49 |
+
for page in pdf_reader.pages:
|
50 |
+
text += page.extract_text() + "\n"
|
51 |
+
return text
|
52 |
+
|
53 |
+
|
54 |
+
def analyze_paper(text: str, category: str) -> str:
|
55 |
+
"""Generate a prompt and get analysis for a specific category."""
|
56 |
+
prompts = {
|
57 |
+
"Summarized Abstract": "Extract and summarize the abstract from this research paper:",
|
58 |
+
"Results": "What are the main results and findings from this research paper:",
|
59 |
+
"Summarized Introduction": "Summarize the introduction section of this research paper:",
|
60 |
+
"Methods Used": "What are the main methods and methodologies used in this research:",
|
61 |
+
"Literature Survey": "Summarize the literature review or related work from this paper:",
|
62 |
+
"Limitations": "What are the limitations mentioned in this research:",
|
63 |
+
"Contributions": "What are the main contributions of this research:",
|
64 |
+
"Practical Implications": "What are the practical implications of this research:",
|
65 |
+
"Objectives": "What are the main objectives of this research:",
|
66 |
+
"Findings": "What are the key findings from this research:",
|
67 |
+
"Future Research": "What future research directions are suggested in this paper:",
|
68 |
+
"Dependent Variables": "What are the dependent variables studied in this research:",
|
69 |
+
"Independent Variables": "What are the independent variables studied in this research:",
|
70 |
+
"Dataset": "What dataset(s) were used in this research:",
|
71 |
+
"Problem Statement": "What is the main problem statement or research question:",
|
72 |
+
"Challenges": "What challenges were faced or addressed in this research:",
|
73 |
+
"Applications": "What are the potential applications of this research:",
|
74 |
+
}
|
75 |
+
|
76 |
+
prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
|
77 |
+
return call_perplexity_api(prompt)
|
78 |
+
|
79 |
+
|
80 |
+
class ResearchAssistant:
|
81 |
+
def __init__(self, perplexity_key: str):
|
82 |
+
self.perplexity_key = perplexity_key
|
83 |
+
|
84 |
+
def chat_with_pdf(self, pdf_text: str, query: str) -> Dict:
|
85 |
+
chunks = self._split_text(pdf_text)
|
86 |
+
relevant_chunks = self._get_relevant_chunks(chunks, query)
|
87 |
+
|
88 |
+
prompt = f"Context from PDF:\n\n{relevant_chunks}\n\nQuestion: {query}"
|
89 |
+
response_text = call_perplexity_api(prompt)
|
90 |
+
return {"choices": [{"message": {"content": response_text}}]}
|
91 |
+
|
92 |
+
def generate_literature_review(self, topic: str) -> Dict:
|
93 |
+
try:
|
94 |
+
# Search arXiv for papers
|
95 |
+
papers = self._search_arxiv(topic)
|
96 |
+
if not papers:
|
97 |
+
return {"error": "No papers found on the topic"}
|
98 |
+
|
99 |
+
# Format paper information
|
100 |
+
papers_summary = "\n\n".join(
|
101 |
+
[
|
102 |
+
f"Paper: {p['title']}\nAuthors: {', '.join(p['authors'])}\nSummary: {p['summary']}"
|
103 |
+
for p in papers
|
104 |
+
]
|
105 |
+
)
|
106 |
+
|
107 |
+
prompt = f"""Generate a comprehensive literature review on '{topic}'. Based on these papers:
|
108 |
+
|
109 |
+
{papers_summary}
|
110 |
+
|
111 |
+
Structure the review as follows:
|
112 |
+
1. Introduction and Background
|
113 |
+
2. Current Research Trends
|
114 |
+
3. Key Findings and Themes
|
115 |
+
4. Research Gaps
|
116 |
+
5. Future Directions"""
|
117 |
+
|
118 |
+
response_text = call_perplexity_api(prompt)
|
119 |
+
return {"choices": [{"message": {"content": response_text}}]}
|
120 |
+
except Exception as e:
|
121 |
+
return {"error": f"Literature review generation failed: {str(e)}"}
|
122 |
+
|
123 |
+
def ai_writer(self, outline: str, references: List[str]) -> Dict:
|
124 |
+
prompt = f"""Write a research paper following this structure:
|
125 |
+
|
126 |
+
Outline:
|
127 |
+
{outline}
|
128 |
+
|
129 |
+
References to incorporate:
|
130 |
+
{json.dumps(references)}
|
131 |
+
|
132 |
+
Instructions:
|
133 |
+
- Follow academic writing style
|
134 |
+
- Include appropriate citations
|
135 |
+
- Maintain logical flow
|
136 |
+
- Include introduction and conclusion"""
|
137 |
+
|
138 |
+
response_text = call_perplexity_api(prompt)
|
139 |
+
return {"choices": [{"message": {"content": response_text}}]}
|
140 |
+
|
141 |
+
def refine_response(self, response: str, column: str) -> str:
|
142 |
+
prompt = f"""Refine the following response to fit the '{column}' column in a research paper CSV format:
|
143 |
+
|
144 |
+
Response: {response}
|
145 |
+
|
146 |
+
Ensure the response is clear, concise, and fits the context of the column."""
|
147 |
+
|
148 |
+
refined_response = call_perplexity_api(prompt)
|
149 |
+
return refined_response
|
150 |
+
|
151 |
+
def paraphrase(self, text: str) -> Dict:
|
152 |
+
prompt = f"""Paraphrase the following text while:
|
153 |
+
- Maintaining academic tone
|
154 |
+
- Preserving key meaning
|
155 |
+
- Improving clarity
|
156 |
+
|
157 |
+
Text: {text}"""
|
158 |
+
|
159 |
+
response_text = call_perplexity_api(prompt)
|
160 |
+
return {"choices": [{"message": {"content": response_text}}]}
|
161 |
+
|
162 |
+
def generate_citation(self, paper_info: Dict, style: str = "APA") -> Dict:
|
163 |
+
prompt = f"""Generate a {style} citation for:
|
164 |
+
Title: {paper_info['title']}
|
165 |
+
Authors: {', '.join(paper_info['authors'])}
|
166 |
+
Year: {paper_info['year']}
|
167 |
+
|
168 |
+
Follow exact {style} format guidelines."""
|
169 |
+
|
170 |
+
response_text = call_perplexity_api(prompt)
|
171 |
+
return {"citation": response_text}
|
172 |
+
|
173 |
+
def detect_ai_content(self, text: str) -> Dict:
|
174 |
+
prompt = f"""You are an AI content detector. Analyze the text for:
|
175 |
+
1. Writing style consistency
|
176 |
+
2. Language patterns
|
177 |
+
3. Contextual coherence
|
178 |
+
4. Common AI patterns
|
179 |
+
Provide a clear analysis with confidence level.
|
180 |
+
|
181 |
+
Text: {text}"""
|
182 |
+
|
183 |
+
response = requests.post(
|
184 |
+
"https://api.sapling.ai/api/v1/aidetect",
|
185 |
+
json={"key": SAPLING_API_KEY, "text": text},
|
186 |
+
)
|
187 |
+
st.info(
|
188 |
+
"A score from 0 to 1 will be returned, with 0 indicating the maximum confidence that the text is human-written, and 1 indicating the maximum confidence that the text is AI-generated."
|
189 |
+
)
|
190 |
+
|
191 |
+
if response.status_code == 200:
|
192 |
+
return {"choices": [{"message": {"content": response.json()}}]}
|
193 |
+
else:
|
194 |
+
return {
|
195 |
+
"error": f"Sapling API Error: {response.status_code} - {response.text}"
|
196 |
+
}
|
197 |
+
|
198 |
+
def _split_text(self, text: str) -> List[str]:
|
199 |
+
splitter = RecursiveCharacterTextSplitter(
|
200 |
+
chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " ", ""]
|
201 |
+
)
|
202 |
+
return splitter.split_text(text)
|
203 |
+
|
204 |
+
def _get_relevant_chunks(self, chunks: List[str], query: str) -> str:
|
205 |
+
# Simple keyword-based relevance scoring
|
206 |
+
query_words = set(query.lower().split())
|
207 |
+
scored_chunks = []
|
208 |
+
|
209 |
+
for chunk in chunks:
|
210 |
+
chunk_words = set(chunk.lower().split())
|
211 |
+
score = len(query_words.intersection(chunk_words))
|
212 |
+
scored_chunks.append((score, chunk))
|
213 |
+
|
214 |
+
scored_chunks.sort(reverse=True)
|
215 |
+
return "\n\n".join(chunk for _, chunk in scored_chunks[:3])
|
216 |
+
|
217 |
+
def _search_arxiv(self, topic: str) -> List[Dict]:
|
218 |
+
try:
|
219 |
+
query = "+AND+".join(topic.split())
|
220 |
+
url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5"
|
221 |
+
response = requests.get(url, timeout=10)
|
222 |
+
response.raise_for_status()
|
223 |
+
return self._parse_arxiv_response(response.text)
|
224 |
+
except Exception as e:
|
225 |
+
print(f"arXiv search failed: {str(e)}")
|
226 |
+
return []
|
227 |
+
|
228 |
+
def _parse_arxiv_response(self, response_text: str) -> List[Dict]:
|
229 |
+
try:
|
230 |
+
root = ET.fromstring(response_text)
|
231 |
+
papers = []
|
232 |
+
for entry in root.findall("{http://www.w3.org/2005/Atom}entry"):
|
233 |
+
paper = {
|
234 |
+
"id": entry.find("{http://www.w3.org/2005/Atom}id").text,
|
235 |
+
"title": entry.find(
|
236 |
+
"{http://www.w3.org/2005/Atom}title"
|
237 |
+
).text.strip(),
|
238 |
+
"summary": entry.find(
|
239 |
+
"{http://www.w3.org/2005/Atom}summary"
|
240 |
+
).text.strip(),
|
241 |
+
"authors": [
|
242 |
+
author.find("{http://www.w3.org/2005/Atom}name").text.strip()
|
243 |
+
for author in entry.findall(
|
244 |
+
"{http://www.w3.org/2005/Atom}author"
|
245 |
+
)
|
246 |
+
],
|
247 |
+
"published": entry.find(
|
248 |
+
"{http://www.w3.org/2005/Atom}published"
|
249 |
+
).text[:10],
|
250 |
+
}
|
251 |
+
papers.append(paper)
|
252 |
+
return papers
|
253 |
+
except Exception as e:
|
254 |
+
print(f"arXiv response parsing failed: {str(e)}")
|
255 |
+
return []
|
256 |
+
|
257 |
+
|
258 |
+
def main():
|
259 |
+
# st.set_page_config(page_title="Research Assistant", layout="wide")
|
260 |
+
st.title("Research Copilot")
|
261 |
+
|
262 |
+
if not PERPLEXITY_API_KEY:
|
263 |
+
st.warning("Perplexity API key not found in environment variables.")
|
264 |
+
return
|
265 |
+
|
266 |
+
assistant = ResearchAssistant(PERPLEXITY_API_KEY)
|
267 |
+
|
268 |
+
tabs = st.tabs(
|
269 |
+
[
|
270 |
+
"Chat with PDF",
|
271 |
+
"Literature Review",
|
272 |
+
"AI Writer",
|
273 |
+
"Extract Data",
|
274 |
+
"Paraphraser",
|
275 |
+
"Citation Generator",
|
276 |
+
"AI Detector",
|
277 |
+
]
|
278 |
+
)
|
279 |
+
|
280 |
+
with tabs[0]: # Chat with PDF
|
281 |
+
st.header("Chat with PDF")
|
282 |
+
|
283 |
+
# File uploader with clear button
|
284 |
+
col1, col2 = st.columns([3, 1])
|
285 |
+
with col1:
|
286 |
+
uploaded_file = st.file_uploader("Upload PDF", type="pdf", key="pdf_chat")
|
287 |
+
with col2:
|
288 |
+
if st.button("Clear PDF"):
|
289 |
+
st.session_state.pop("pdf_text", None)
|
290 |
+
st.rerun()
|
291 |
+
|
292 |
+
if uploaded_file:
|
293 |
+
if "pdf_text" not in st.session_state:
|
294 |
+
with st.spinner("Processing PDF..."):
|
295 |
+
reader = PyPDF2.PdfReader(uploaded_file)
|
296 |
+
st.session_state.pdf_text = ""
|
297 |
+
for page in reader.pages:
|
298 |
+
st.session_state.pdf_text += page.extract_text()
|
299 |
+
st.success("PDF processed successfully!")
|
300 |
+
|
301 |
+
query = st.text_input("Ask a question about the PDF")
|
302 |
+
if query:
|
303 |
+
with st.spinner("Analyzing..."):
|
304 |
+
response = assistant.chat_with_pdf(st.session_state.pdf_text, query)
|
305 |
+
if "error" in response:
|
306 |
+
st.error(response["error"])
|
307 |
+
else:
|
308 |
+
st.write(response["choices"][0]["message"]["content"])
|
309 |
+
|
310 |
+
with tabs[1]: # Literature Review
|
311 |
+
st.header("Literature Review")
|
312 |
+
topic = st.text_input("Enter research topic")
|
313 |
+
if st.button("Generate Review") and topic:
|
314 |
+
with st.spinner("Generating literature review..."):
|
315 |
+
review = assistant.generate_literature_review(topic)
|
316 |
+
if "error" in review:
|
317 |
+
st.error(review["error"])
|
318 |
+
else:
|
319 |
+
st.write(review["choices"][0]["message"]["content"])
|
320 |
+
|
321 |
+
with tabs[2]: # AI Writer
|
322 |
+
st.header("AI Writer")
|
323 |
+
outline = st.text_area("Enter paper outline")
|
324 |
+
references = st.text_area("Enter references (one per line)")
|
325 |
+
if st.button("Generate Paper") and outline:
|
326 |
+
with st.spinner("Writing paper..."):
|
327 |
+
paper = assistant.ai_writer(outline, references.split("\n"))
|
328 |
+
if "error" in paper:
|
329 |
+
st.error(paper["error"])
|
330 |
+
else:
|
331 |
+
st.write(paper["choices"][0]["message"]["content"])
|
332 |
+
|
333 |
+
with tabs[3]: # Extract Data
|
334 |
+
st.header("Extract Data")
|
335 |
+
|
336 |
+
uploaded_files = st.file_uploader(
|
337 |
+
"Upload multiple PDF files", type="pdf", accept_multiple_files=True
|
338 |
+
)
|
339 |
+
|
340 |
+
if uploaded_files:
|
341 |
+
if st.button("Process Papers"):
|
342 |
+
# Initialize progress bar
|
343 |
+
progress_bar = st.progress(0)
|
344 |
+
status_text = st.empty()
|
345 |
+
|
346 |
+
# Initialize results dictionary
|
347 |
+
results = []
|
348 |
+
|
349 |
+
# Define categories
|
350 |
+
categories = [
|
351 |
+
"Summarized Abstract",
|
352 |
+
"Results",
|
353 |
+
"Summarized Introduction",
|
354 |
+
"Methods Used",
|
355 |
+
"Literature Survey",
|
356 |
+
"Limitations",
|
357 |
+
"Contributions",
|
358 |
+
"Practical Implications",
|
359 |
+
"Objectives",
|
360 |
+
"Findings",
|
361 |
+
"Future Research",
|
362 |
+
"Dependent Variables",
|
363 |
+
"Independent Variables",
|
364 |
+
"Dataset",
|
365 |
+
"Problem Statement",
|
366 |
+
"Challenges",
|
367 |
+
"Applications",
|
368 |
+
]
|
369 |
+
|
370 |
+
# Process each file
|
371 |
+
for i, file in enumerate(uploaded_files):
|
372 |
+
status_text.text(f"Processing {file.name}...")
|
373 |
+
|
374 |
+
# Extract text from PDF
|
375 |
+
text = extract_text_from_pdf(file)
|
376 |
+
|
377 |
+
# Initialize paper results
|
378 |
+
paper_results = {"Filename": file.name}
|
379 |
+
|
380 |
+
# Analyze each category
|
381 |
+
for j, category in enumerate(categories):
|
382 |
+
status_text.text(f"Processing {file.name} - {category}")
|
383 |
+
paper_results[category] = analyze_paper(text, category)
|
384 |
+
|
385 |
+
# Update progress
|
386 |
+
progress = (i * len(categories) + j + 1) / (
|
387 |
+
len(uploaded_files) * len(categories)
|
388 |
+
)
|
389 |
+
progress_bar.progress(progress)
|
390 |
+
|
391 |
+
# Add small delay to avoid API rate limits
|
392 |
+
time.sleep(1)
|
393 |
+
|
394 |
+
results.append(paper_results)
|
395 |
+
|
396 |
+
# Create DataFrame
|
397 |
+
df = pd.DataFrame(results)
|
398 |
+
|
399 |
+
# Convert DataFrame to CSV
|
400 |
+
csv = df.to_csv(index=False)
|
401 |
+
|
402 |
+
# Create download button
|
403 |
+
st.download_button(
|
404 |
+
label="Download Results as CSV",
|
405 |
+
data=csv,
|
406 |
+
file_name="research_papers_analysis.csv",
|
407 |
+
mime="text/csv",
|
408 |
+
)
|
409 |
+
|
410 |
+
# Display results in the app
|
411 |
+
st.subheader("Analysis Results")
|
412 |
+
st.dataframe(df)
|
413 |
+
|
414 |
+
status_text.text("Processing complete!")
|
415 |
+
progress_bar.progress(1.0)
|
416 |
+
|
417 |
+
with tabs[4]: # Paraphraser
|
418 |
+
st.header("Paraphraser")
|
419 |
+
text = st.text_area("Enter text to paraphrase")
|
420 |
+
if st.button("Paraphrase") and text:
|
421 |
+
with st.spinner("Paraphrasing..."):
|
422 |
+
result = assistant.paraphrase(text)
|
423 |
+
if "error" in result:
|
424 |
+
st.error(result["error"])
|
425 |
+
else:
|
426 |
+
st.write(result["choices"][0]["message"]["content"])
|
427 |
+
|
428 |
+
with tabs[5]: # Citation Generator
|
429 |
+
st.header("Citation Generator")
|
430 |
+
col1, col2 = st.columns(2)
|
431 |
+
with col1:
|
432 |
+
title = st.text_input("Paper Title")
|
433 |
+
authors = st.text_input("Authors (comma-separated)")
|
434 |
+
with col2:
|
435 |
+
year = st.text_input("Year")
|
436 |
+
style = st.selectbox("Citation Style", ["APA", "MLA", "Chicago"])
|
437 |
+
|
438 |
+
if st.button("Generate Citation") and title:
|
439 |
+
with st.spinner("Generating citation..."):
|
440 |
+
citation = assistant.generate_citation(
|
441 |
+
{
|
442 |
+
"title": title,
|
443 |
+
"authors": [a.strip() for a in authors.split(",")],
|
444 |
+
"year": year,
|
445 |
+
},
|
446 |
+
style,
|
447 |
+
)
|
448 |
+
if "error" in citation:
|
449 |
+
st.error(citation["error"])
|
450 |
+
else:
|
451 |
+
st.code(citation["citation"], language="text")
|
452 |
+
|
453 |
+
with tabs[6]: # AI Detector
|
454 |
+
st.header("AI Detector")
|
455 |
+
text = st.text_area("Enter text to analyze")
|
456 |
+
if st.button("Detect AI Content") and text:
|
457 |
+
with st.spinner("Analyzing..."):
|
458 |
+
result = assistant.detect_ai_content(text)
|
459 |
+
if "error" in result:
|
460 |
+
st.error(result["error"])
|
461 |
+
else:
|
462 |
+
st.write(result["choices"][0]["message"]["content"])
|
463 |
+
|
464 |
+
|
465 |
+
if __name__ == "__main__":
|
466 |
+
main()
|
session_page.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ui.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from streamlit_option_menu import option_menu
|
3 |
+
|
4 |
+
|
5 |
+
# Page Configuration
|
6 |
+
st.set_page_config(page_title="Enhanced Navigation Demo", layout="wide")
|
7 |
+
|
8 |
+
# Top Navigation Bar using option_menu
|
9 |
+
selected = option_menu(
|
10 |
+
menu_title=None,
|
11 |
+
options=["Home", "Documentation", "Examples", "Community", "About"],
|
12 |
+
icons=["house", "book", "code", "people", "info-circle"],
|
13 |
+
menu_icon="cast",
|
14 |
+
default_index=0,
|
15 |
+
orientation="horizontal",
|
16 |
+
styles={
|
17 |
+
"container": {"padding": "0!important", "background-color": "#fafafa"},
|
18 |
+
"icon": {"color": "orange", "font-size": "25px"},
|
19 |
+
"nav-link": {
|
20 |
+
"font-size": "15px",
|
21 |
+
"text-align": "center",
|
22 |
+
"margin":"0px",
|
23 |
+
"--hover-color": "#eee",
|
24 |
+
},
|
25 |
+
"nav-link-selected": {"background-color": "#0083B8"},
|
26 |
+
}
|
27 |
+
)
|
28 |
+
|
29 |
+
# Sidebar Navigation
|
30 |
+
with st.sidebar:
|
31 |
+
st.header("Navigation Menu")
|
32 |
+
|
33 |
+
# Main Menu Items
|
34 |
+
selected_side = option_menu(
|
35 |
+
menu_title="Go to",
|
36 |
+
options=["Dashboard", "Analytics", "Reports", "Settings"],
|
37 |
+
icons=["speedometer2", "graph-up", "file-text", "gear"],
|
38 |
+
menu_icon="list",
|
39 |
+
default_index=0,
|
40 |
+
)
|
41 |
+
|
42 |
+
# Expandable Reports Section
|
43 |
+
if selected_side == "Reports":
|
44 |
+
with st.expander("Reports", expanded=True):
|
45 |
+
st.button("Weekly Report")
|
46 |
+
st.button("Monthly Report")
|
47 |
+
st.button("Annual Report")
|
48 |
+
|
49 |
+
# Main Content Area based on top navigation
|
50 |
+
if selected == "Home":
|
51 |
+
st.title("Welcome to Home")
|
52 |
+
st.write("This is the home page content.")
|
53 |
+
|
54 |
+
# Dashboard Content
|
55 |
+
st.header("Dashboard")
|
56 |
+
col1, col2, col3 = st.columns(3)
|
57 |
+
with col1:
|
58 |
+
st.metric("Sales", "$12,345", "+2.5%")
|
59 |
+
with col2:
|
60 |
+
st.metric("Users", "1,234", "-8%")
|
61 |
+
with col3:
|
62 |
+
st.metric("Conversion", "3.2%", "+1.2%")
|
63 |
+
|
64 |
+
elif selected == "Documentation":
|
65 |
+
st.title("Documentation")
|
66 |
+
st.write("Documentation content goes here.")
|
67 |
+
|
68 |
+
elif selected == "Examples":
|
69 |
+
st.title("Examples")
|
70 |
+
st.write("Example content goes here.")
|
71 |
+
|
72 |
+
elif selected == "Community":
|
73 |
+
st.title("Community")
|
74 |
+
st.write("Community content goes here.")
|
75 |
+
|
76 |
+
elif selected == "About":
|
77 |
+
st.title("About")
|
78 |
+
st.write("About content goes here.")
|
79 |
+
|
80 |
+
# Content based on sidebar selection
|
81 |
+
if selected_side == "Analytics":
|
82 |
+
st.header("Analytics")
|
83 |
+
st.line_chart({"data": [1, 5, 2, 6, 2, 1]})
|
84 |
+
elif selected_side == "Settings":
|
85 |
+
st.header("Settings")
|
86 |
+
st.toggle("Dark Mode")
|
87 |
+
st.toggle("Notifications")
|
88 |
+
st.slider("Volume", 0, 100, 50)
|
89 |
+
|
90 |
+
# Footer
|
91 |
+
st.markdown(
|
92 |
+
"""
|
93 |
+
<style>
|
94 |
+
.footer {
|
95 |
+
position: fixed;
|
96 |
+
left: 0;
|
97 |
+
bottom: 0;
|
98 |
+
width: 100%;
|
99 |
+
background-color: #0E1117;
|
100 |
+
color: white;
|
101 |
+
text-align: center;
|
102 |
+
padding: 10px;
|
103 |
+
font-size: 14px;
|
104 |
+
}
|
105 |
+
</style>
|
106 |
+
<div class='footer'>
|
107 |
+
© 2024 Your App Name • Privacy Policy • Terms of Service
|
108 |
+
</div>
|
109 |
+
""",
|
110 |
+
unsafe_allow_html=True
|
111 |
+
)
|
utils/helpers.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
import streamlit as st
|
3 |
+
|
4 |
+
def format_datetime(dt):
|
5 |
+
"""Format datetime for display"""
|
6 |
+
return dt.strftime("%Y-%m-%d %H:%M")
|
7 |
+
|
8 |
+
def get_session_progress(username, course_id, session_id):
|
9 |
+
"""
|
10 |
+
Get user's progress for a specific session
|
11 |
+
Returns dict with pre_class, in_class, and post_class completion status
|
12 |
+
"""
|
13 |
+
# Demo implementation - replace with actual database queries
|
14 |
+
return {
|
15 |
+
'pre_class': {
|
16 |
+
'completed': True,
|
17 |
+
'last_access': datetime.now() - timedelta(days=1),
|
18 |
+
'resources_viewed': 3,
|
19 |
+
'total_resources': 3
|
20 |
+
},
|
21 |
+
'in_class': {
|
22 |
+
'completed': False,
|
23 |
+
'attendance': True,
|
24 |
+
'quiz_completed': False,
|
25 |
+
'questions_asked': 5
|
26 |
+
},
|
27 |
+
'post_class': {
|
28 |
+
'completed': False,
|
29 |
+
'assignments_submitted': 1,
|
30 |
+
'total_assignments': 2,
|
31 |
+
'grade': None
|
32 |
+
}
|
33 |
+
}
|
34 |
+
|
35 |
+
def get_course_sessions(course_id):
|
36 |
+
"""Get all sessions for a course"""
|
37 |
+
# Demo implementation - replace with database query
|
38 |
+
return [
|
39 |
+
{
|
40 |
+
'id': 1,
|
41 |
+
'title': 'Introduction to Programming Concepts',
|
42 |
+
'date': datetime.now() + timedelta(days=i),
|
43 |
+
'status': 'completed' if i < 0 else 'upcoming'
|
44 |
+
}
|
45 |
+
for i in range(-2, 5)
|
46 |
+
]
|
47 |
+
|
48 |
+
def display_progress_bar(completed, total, text=""):
|
49 |
+
"""Display a progress bar with text"""
|
50 |
+
progress = completed / total if total > 0 else 0
|
51 |
+
st.progress(progress)
|
52 |
+
st.text(f"{text}: {completed}/{total} ({progress*100:.1f}%)")
|
53 |
+
|
54 |
+
def create_notification(message, type="info"):
|
55 |
+
"""Create a notification message"""
|
56 |
+
if type == "success":
|
57 |
+
st.success(message)
|
58 |
+
elif type == "error":
|
59 |
+
st.error(message)
|
60 |
+
elif type == "warning":
|
61 |
+
st.warning(message)
|
62 |
+
else:
|
63 |
+
st.info(message)
|
64 |
+
|
65 |
+
class SessionManager:
|
66 |
+
"""Manage session state and navigation"""
|
67 |
+
@staticmethod
|
68 |
+
def get_current_session():
|
69 |
+
"""Get current session information"""
|
70 |
+
if 'current_session' not in st.session_state:
|
71 |
+
st.session_state.current_session = 1
|
72 |
+
return st.session_state.current_session
|
73 |
+
|
74 |
+
@staticmethod
|
75 |
+
def set_current_session(session_id):
|
76 |
+
"""Set current session"""
|
77 |
+
st.session_state.current_session = session_id
|
78 |
+
|
79 |
+
@staticmethod
|
80 |
+
def clear_session():
|
81 |
+
"""Clear session state"""
|
82 |
+
for key in list(st.session_state.keys()):
|
83 |
+
del st.session_state[key]
|
utils/sample_data.py
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
|
3 |
+
SAMPLE_COURSES = [
|
4 |
+
{
|
5 |
+
'course_id': 'CS101',
|
6 |
+
'title': 'Introduction to Computer Science',
|
7 |
+
'description': 'This course covers the basics of computer science and programming.',
|
8 |
+
'instructor': 'Dr. John Doe',
|
9 |
+
'duration': '10 weeks'
|
10 |
+
},
|
11 |
+
{
|
12 |
+
'course_id': 'CS102',
|
13 |
+
'title': 'Data Structures and Algorithms',
|
14 |
+
'description': 'This course introduces data structures and algorithms for efficient data processing.',
|
15 |
+
'instructor': 'Dr. Jane Smith',
|
16 |
+
'duration': '12 weeks'
|
17 |
+
},
|
18 |
+
{
|
19 |
+
'course_id': 'CS103',
|
20 |
+
'title': 'Advanced Python Programming',
|
21 |
+
'description': 'This course covers advanced topics in Python programming, including file handling and exception management.',
|
22 |
+
'instructor': 'Dr. Emily Johnson',
|
23 |
+
'duration': '8 weeks'
|
24 |
+
}
|
25 |
+
]
|
26 |
+
|
27 |
+
SAMPLE_SESSIONS = [
|
28 |
+
{
|
29 |
+
'id': 1,
|
30 |
+
'course_id': 'CS101',
|
31 |
+
'title': 'Introduction to Programming Fundamentals',
|
32 |
+
'date': datetime.now() - timedelta(days=7),
|
33 |
+
'status': 'completed',
|
34 |
+
'pre_class': {
|
35 |
+
'resources': [
|
36 |
+
{'type': 'pdf', 'title': 'Introduction to Python Basics', 'url': '/assets/python_basics.pdf'},
|
37 |
+
{'type': 'video', 'title': 'Programming Fundamentals', 'duration': '15:00'},
|
38 |
+
{'type': 'reading', 'title': 'Chapter 1: Getting Started', 'pages': '1-15'}
|
39 |
+
],
|
40 |
+
'completion_required': True
|
41 |
+
},
|
42 |
+
'in_class': {
|
43 |
+
'topics': ['Variables', 'Data Types', 'Basic Operations'],
|
44 |
+
'quiz': {
|
45 |
+
'title': 'Python Basics Quiz',
|
46 |
+
'questions': 5,
|
47 |
+
'duration': 15
|
48 |
+
},
|
49 |
+
'polls': [
|
50 |
+
{'question': 'How comfortable are you with Python syntax?', 'options': ['Very', 'Somewhat', 'Not at all']}
|
51 |
+
]
|
52 |
+
},
|
53 |
+
'post_class': {
|
54 |
+
'assignments': [
|
55 |
+
{
|
56 |
+
'id': 1,
|
57 |
+
'title': 'Basic Python Programs',
|
58 |
+
'due_date': datetime.now() + timedelta(days=2),
|
59 |
+
'status': 'pending'
|
60 |
+
}
|
61 |
+
]
|
62 |
+
}
|
63 |
+
},
|
64 |
+
{
|
65 |
+
'id': 2,
|
66 |
+
'course_id': 'CS101',
|
67 |
+
'title': 'Control Flow and Functions',
|
68 |
+
'date': datetime.now() - timedelta(days=3),
|
69 |
+
'status': 'completed',
|
70 |
+
'pre_class': {
|
71 |
+
'resources': [
|
72 |
+
{'type': 'pdf', 'title': 'Control Flow in Python', 'url': '/assets/control_flow.pdf'},
|
73 |
+
{'type': 'video', 'title': 'Functions and Methods', 'duration': '20:00'}
|
74 |
+
],
|
75 |
+
'completion_required': True
|
76 |
+
},
|
77 |
+
'in_class': {
|
78 |
+
'topics': ['If-else statements', 'Loops', 'Function definitions'],
|
79 |
+
'quiz': {
|
80 |
+
'title': 'Control Flow Quiz',
|
81 |
+
'questions': 8,
|
82 |
+
'duration': 20
|
83 |
+
},
|
84 |
+
'polls': [
|
85 |
+
{'question': 'Which loop type do you find more intuitive?', 'options': ['For loops', 'While loops', 'Both']}
|
86 |
+
]
|
87 |
+
},
|
88 |
+
'post_class': {
|
89 |
+
'assignments': [
|
90 |
+
{
|
91 |
+
'id': 2,
|
92 |
+
'title': 'Function Implementation Exercise',
|
93 |
+
'due_date': datetime.now() + timedelta(days=4),
|
94 |
+
'status': 'pending'
|
95 |
+
}
|
96 |
+
]
|
97 |
+
}
|
98 |
+
},
|
99 |
+
{
|
100 |
+
'id': 3,
|
101 |
+
'course_id': 'CS102',
|
102 |
+
'title': 'Data Structures',
|
103 |
+
'date': datetime.now(),
|
104 |
+
'status': 'in_progress',
|
105 |
+
'pre_class': {
|
106 |
+
'resources': [
|
107 |
+
{'type': 'pdf', 'title': 'Python Data Structures', 'url': '/assets/data_structures.pdf'},
|
108 |
+
{'type': 'video', 'title': 'Lists and Dictionaries', 'duration': '25:00'}
|
109 |
+
],
|
110 |
+
'completion_required': True
|
111 |
+
},
|
112 |
+
'in_class': {
|
113 |
+
'topics': ['Lists', 'Tuples', 'Dictionaries', 'Sets'],
|
114 |
+
'quiz': {
|
115 |
+
'title': 'Data Structures Quiz',
|
116 |
+
'questions': 10,
|
117 |
+
'duration': 25
|
118 |
+
},
|
119 |
+
'polls': [
|
120 |
+
{'question': 'Which data structure do you use most often?', 'options': ['Lists', 'Dictionaries', 'Sets', 'Tuples']}
|
121 |
+
]
|
122 |
+
},
|
123 |
+
'post_class': {
|
124 |
+
'assignments': [
|
125 |
+
{
|
126 |
+
'id': 3,
|
127 |
+
'title': 'Data Structure Implementation',
|
128 |
+
'due_date': datetime.now() + timedelta(days=7),
|
129 |
+
'status': 'not_started'
|
130 |
+
}
|
131 |
+
]
|
132 |
+
}
|
133 |
+
},
|
134 |
+
{
|
135 |
+
'id': 4,
|
136 |
+
'course_id': 'CS101',
|
137 |
+
'title': 'Object-Oriented Programming',
|
138 |
+
'date': datetime.now() + timedelta(days=4),
|
139 |
+
'status': 'upcoming',
|
140 |
+
'pre_class': {
|
141 |
+
'resources': [
|
142 |
+
{'type': 'pdf', 'title': 'OOP Concepts', 'url': '/assets/oop_concepts.pdf'},
|
143 |
+
{'type': 'video', 'title': 'Classes and Objects', 'duration': '30:00'}
|
144 |
+
],
|
145 |
+
'completion_required': True
|
146 |
+
},
|
147 |
+
'in_class': {
|
148 |
+
'topics': ['Classes', 'Objects', 'Inheritance', 'Polymorphism'],
|
149 |
+
'quiz': {
|
150 |
+
'title': 'OOP Concepts Quiz',
|
151 |
+
'questions': 12,
|
152 |
+
'duration': 30
|
153 |
+
},
|
154 |
+
'polls': [
|
155 |
+
{'question': 'Have you used OOP before?', 'options': ['Yes', 'No', 'Not sure'], 'responses': {'For loops': 12, 'While loops': 8, 'Both': 10}}
|
156 |
+
]
|
157 |
+
},
|
158 |
+
'post_class': {
|
159 |
+
'assignments': [
|
160 |
+
{
|
161 |
+
'id': 4,
|
162 |
+
'title': 'Class Implementation Project',
|
163 |
+
'due_date': datetime.now() + timedelta(days=11),
|
164 |
+
'status': 'not_started'
|
165 |
+
}
|
166 |
+
]
|
167 |
+
}
|
168 |
+
},
|
169 |
+
{
|
170 |
+
'id': 5,
|
171 |
+
'course_id': 'CS103',
|
172 |
+
'title': 'File Handling and Exception Management',
|
173 |
+
'date': datetime.now() + timedelta(days=7),
|
174 |
+
'status': 'upcoming',
|
175 |
+
'pre_class': {
|
176 |
+
'resources': [
|
177 |
+
{'type': 'pdf', 'title': 'File Operations in Python', 'url': '/assets/file_ops.pdf'},
|
178 |
+
{'type': 'video', 'title': 'Exception Handling', 'duration': '20:00'}
|
179 |
+
],
|
180 |
+
'completion_required': True
|
181 |
+
},
|
182 |
+
'in_class': {
|
183 |
+
'topics': ['File Operations', 'Exception Handling', 'Context Managers'],
|
184 |
+
'quiz': {
|
185 |
+
'title': 'File Operations Quiz',
|
186 |
+
'questions': 8,
|
187 |
+
'duration': 20
|
188 |
+
},
|
189 |
+
'polls': [
|
190 |
+
{'question': 'How often do you handle exceptions in your code?',
|
191 |
+
'options': ['Always', 'Sometimes', 'Rarely', 'Never'],
|
192 |
+
'responses': {'Very': 10, 'Somewhat': 15, 'Not at all': 5}
|
193 |
+
}
|
194 |
+
]
|
195 |
+
},
|
196 |
+
'post_class': {
|
197 |
+
'assignments': [
|
198 |
+
{
|
199 |
+
'id': 5,
|
200 |
+
'title': 'File Processing Application',
|
201 |
+
'due_date': datetime.now() + timedelta(days=14),
|
202 |
+
'status': 'not_started'
|
203 |
+
}
|
204 |
+
]
|
205 |
+
}
|
206 |
+
}
|
207 |
+
]
|
208 |
+
|
209 |
+
# Chatbot message history
|
210 |
+
SAMPLE_CHAT_HISTORY = {
|
211 |
+
1: [
|
212 |
+
{'user': 'student1', 'message': 'What is the difference between list and tuple?', 'timestamp': datetime.now()},
|
213 |
+
{'user': 'chatbot', 'message': 'Lists are mutable (can be modified) while tuples are immutable (cannot be modified after creation).', 'timestamp': datetime.now()}
|
214 |
+
]
|
215 |
+
}
|
216 |
+
|
217 |
+
# Student progress data
|
218 |
+
SAMPLE_STUDENT_PROGRESS = {
|
219 |
+
'user1': {
|
220 |
+
1: {'pre_class': 50, 'in_class': 80, 'post_class': 90},
|
221 |
+
2: {'pre_class': 100, 'in_class': 75, 'post_class': 85},
|
222 |
+
3: {'pre_class': 50, 'in_class': 0, 'post_class': 0},
|
223 |
+
4: {'pre_class': 0, 'in_class': 0, 'post_class': 0},
|
224 |
+
5: {'pre_class': 0, 'in_class': 0, 'post_class': 0}
|
225 |
+
}
|
226 |
+
}
|