|
import json |
|
import typing_extensions as typing |
|
import google.generativeai as genai |
|
from typing import List, Dict, Any |
|
import numpy as np |
|
from collections import defaultdict |
|
|
|
from dotenv import load_dotenv |
|
import os |
|
import pymongo |
|
from pymongo import MongoClient |
|
|
|
load_dotenv() |
|
GEMINI_API_KEY = os.getenv('GEMINI_KEY') |
|
|
|
class EngagementMetrics(typing.TypedDict): |
|
participation_level: str |
|
question_quality: str |
|
concept_understanding: str |
|
|
|
class StudentInsight(typing.TypedDict): |
|
student_id: str |
|
performance_level: str |
|
struggling_topics: list[str] |
|
engagement_metrics: EngagementMetrics |
|
|
|
class TopicInsight(typing.TypedDict): |
|
topic: str |
|
difficulty_level: float |
|
student_count: int |
|
common_issues: list[str] |
|
key_misconceptions: list[str] |
|
|
|
class RecommendedAction(typing.TypedDict): |
|
action: str |
|
priority: str |
|
target_group: str |
|
reasoning: str |
|
expected_impact: str |
|
|
|
class ClassDistribution(typing.TypedDict): |
|
high_performers: float |
|
average_performers: float |
|
at_risk: float |
|
|
|
class CourseHealth(typing.TypedDict): |
|
overall_engagement: float |
|
critical_topics: list[str] |
|
class_distribution: ClassDistribution |
|
|
|
class InterventionMetrics(typing.TypedDict): |
|
immediate_attention_needed: list[str] |
|
monitoring_required: list[str] |
|
|
|
class AnalyticsResponse(typing.TypedDict): |
|
topic_insights: list[TopicInsight] |
|
student_insights: list[StudentInsight] |
|
recommended_actions: list[RecommendedAction] |
|
course_health: CourseHealth |
|
intervention_metrics: InterventionMetrics |
|
|
|
|
|
|
|
class NovaScholarAnalytics: |
|
def __init__(self, model_name: str = "gemini-1.5-flash"): |
|
genai.configure(api_key=GEMINI_API_KEY) |
|
self.model = genai.GenerativeModel(model_name) |
|
|
|
def _create_analytics_prompt(self, chat_histories: List[Dict], all_topics: List[str]) -> str: |
|
"""Creates a structured prompt for Gemini to analyze chat histories.""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics. |
|
Context: |
|
- Chat histories: {json.dumps(chat_histories, indent=2)} |
|
- These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery. |
|
- Topics covered: {', '.join(all_topics)}. |
|
|
|
Your task is to provide detailed analytics that will help faculty address challenges effectively and enhance learning outcomes. |
|
|
|
Output Format (strictly follow this JSON structure): |
|
{{ |
|
"topic_wise_insights": [ |
|
{{ |
|
"topic": "<string>", |
|
"struggling_percentage": <number between 0 and 1>, |
|
"key_issues": ["<string>", "<string>", ...], |
|
"key_misconceptions": ["<string>", "<string>", ...], |
|
"recommended_actions": {{ |
|
"description": "<string>", |
|
"priority": "high|medium|low", |
|
"expected_outcome": "<string>" |
|
}} |
|
}} |
|
], |
|
"ai_recommended_actions": [ |
|
{{ |
|
"action": "<string>", |
|
"priority": "high|medium|low", |
|
"reasoning": "<string>", |
|
"expected_outcome": "<string>", |
|
"pedagogy_recommendations": {{ |
|
"methods": ["<string>", "<string>", ...], |
|
"resources": ["<string>", "<string>", ...], |
|
"expected_impact": "<string>" |
|
}} |
|
}} |
|
], |
|
"student_analytics": [ |
|
{{ |
|
"student_id": "<string>", |
|
"engagement_metrics": {{ |
|
"participation_level": <number between 0 and 1>, |
|
"concept_understanding": "strong|moderate|needs_improvement", |
|
"question_quality": "advanced|intermediate|basic" |
|
}}, |
|
"struggling_topics": ["<string>", "<string>", ...], |
|
"personalized_recommendation": "<string>" |
|
}} |
|
] |
|
}} |
|
|
|
Guidelines for Analysis: |
|
- Focus on actionable and concise insights rather than exhaustive details. |
|
- Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty. |
|
- Prioritize topics with higher difficulty scores or more students struggling. |
|
- Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable. |
|
- Make sure to include All** students in the analysis, not just a subset. |
|
- for the ai_recommended_actions: |
|
- Prioritize pedagogy recommendations for critical topics with the high difficulty scores or struggling percentages. |
|
- For each action: |
|
- Include specific teaching methods (e.g., interactive discussions or quizzes, problem-based learning, practical examples etc). |
|
- Recommend supporting resources (e.g., videos, handouts, simulations). |
|
- Provide reasoning for the recommendation and the expected outcomes for student learning. |
|
- Example: |
|
- **Action:** Conduct an interactive problem-solving session on "<Topic Name>". |
|
- **Reasoning:** Students showed difficulty in applying concepts to practical problems. |
|
- **Expected Outcome:** Improved practical understanding and application of the topic. |
|
- **Pedagogy Recommendations:** |
|
- **Methods:** Group discussions, real-world case studies. |
|
- **Resources:** Online interactive tools, relevant case studies, video walkthroughs. |
|
- **Expected Impact:** Enhance conceptual clarity by 40% and practical application by 30%. |
|
|
|
The response must adhere strictly to the above JSON structure, with all fields populated appropriately.""" |
|
|
|
|
|
def _calculate_class_distribution(self, analytics: Dict) -> Dict: |
|
"""Calculate the distribution of students across performance levels.""" |
|
try: |
|
total_students = len(analytics.get("student_insights", [])) |
|
if total_students == 0: |
|
return { |
|
"high_performers": 0, |
|
"average_performers": 0, |
|
"at_risk": 0 |
|
} |
|
|
|
distribution = defaultdict(int) |
|
|
|
for student in analytics.get("student_insights", []): |
|
performance_level = student.get("performance_level", "average") |
|
|
|
if performance_level in ["excellent", "high", "high_performer"]: |
|
distribution["high_performers"] += 1 |
|
elif performance_level in ["struggling", "low", "at_risk"]: |
|
distribution["at_risk"] += 1 |
|
else: |
|
distribution["average_performers"] += 1 |
|
|
|
|
|
return { |
|
level: count/total_students |
|
for level, count in distribution.items() |
|
} |
|
except Exception as e: |
|
print(f"Error calculating class distribution: {str(e)}") |
|
return { |
|
"high_performers": 0, |
|
"average_performers": 0, |
|
"at_risk": 0 |
|
} |
|
|
|
def _identify_urgent_cases(self, analytics: Dict) -> List[str]: |
|
"""Identify students needing immediate attention.""" |
|
try: |
|
urgent_cases = [] |
|
for student in analytics.get("student_insights", []): |
|
student_id = student.get("student_id") |
|
if not student_id: |
|
continue |
|
|
|
|
|
risk_factors = 0 |
|
|
|
|
|
if student.get("performance_level") in ["struggling", "at_risk", "low"]: |
|
risk_factors += 1 |
|
|
|
|
|
if len(student.get("struggling_topics", [])) >= 2: |
|
risk_factors += 1 |
|
|
|
|
|
engagement = student.get("engagement_metrics", {}) |
|
if (engagement.get("participation_level") == "low" or |
|
engagement.get("concept_understanding") == "needs_improvement"): |
|
risk_factors += 1 |
|
|
|
|
|
if risk_factors >= 2: |
|
urgent_cases.append(student_id) |
|
|
|
return urgent_cases |
|
except Exception as e: |
|
print(f"Error identifying urgent cases: {str(e)}") |
|
return [] |
|
|
|
def _identify_monitoring_cases(self, analytics: Dict) -> List[str]: |
|
"""Identify students who need monitoring but aren't urgent cases.""" |
|
try: |
|
monitoring_cases = [] |
|
urgent_cases = set(self._identify_urgent_cases(analytics)) |
|
|
|
for student in analytics.get("student_insights", []): |
|
student_id = student.get("student_id") |
|
if not student_id or student_id in urgent_cases: |
|
continue |
|
|
|
|
|
monitoring_needed = False |
|
|
|
|
|
if len(student.get("struggling_topics", [])) == 1: |
|
monitoring_needed = True |
|
|
|
|
|
engagement = student.get("engagement_metrics", {}) |
|
if engagement.get("participation_level") == "medium": |
|
monitoring_needed = True |
|
|
|
|
|
if student.get("performance_level") == "average": |
|
monitoring_needed = True |
|
|
|
if monitoring_needed: |
|
monitoring_cases.append(student_id) |
|
|
|
return monitoring_cases |
|
except Exception as e: |
|
print(f"Error identifying monitoring cases: {str(e)}") |
|
return [] |
|
|
|
def _identify_critical_topics(self, analytics: Dict) -> List[str]: |
|
""" |
|
Identify critical topics that need attention based on multiple factors. |
|
Returns a list of topic names that are considered critical. |
|
""" |
|
try: |
|
critical_topics = [] |
|
topics = analytics.get("topic_insights", []) |
|
|
|
for topic in topics: |
|
if not isinstance(topic, dict): |
|
continue |
|
|
|
|
|
critical_score = 0 |
|
|
|
|
|
difficulty_level = topic.get("difficulty_level", 0) |
|
if difficulty_level > 0.7: |
|
critical_score += 2 |
|
elif difficulty_level > 0.5: |
|
critical_score += 1 |
|
|
|
|
|
student_count = topic.get("student_count", 0) |
|
total_students = len(analytics.get("student_insights", [])) |
|
if total_students > 0: |
|
struggle_ratio = student_count / total_students |
|
if struggle_ratio > 0.5: |
|
critical_score += 2 |
|
elif struggle_ratio > 0.3: |
|
critical_score += 1 |
|
|
|
|
|
if len(topic.get("common_issues", [])) > 2: |
|
critical_score += 1 |
|
|
|
|
|
if len(topic.get("key_misconceptions", [])) > 1: |
|
critical_score += 1 |
|
|
|
|
|
if critical_score >= 3: |
|
critical_topics.append(topic.get("topic", "Unknown Topic")) |
|
|
|
return critical_topics |
|
|
|
except Exception as e: |
|
print(f"Error identifying critical topics: {str(e)}") |
|
return [] |
|
|
|
def _calculate_engagement(self, analytics: Dict) -> Dict: |
|
""" |
|
Calculate detailed engagement metrics across all students. |
|
Returns a dictionary with engagement statistics. |
|
""" |
|
try: |
|
total_students = len(analytics.get("student_insights", [])) |
|
if total_students == 0: |
|
return { |
|
"total_students": 0, |
|
"overall_score": 0, |
|
"engagement_distribution": { |
|
"high": 0, |
|
"medium": 0, |
|
"low": 0 |
|
}, |
|
"participation_metrics": { |
|
"average_topics_per_student": 0, |
|
"active_participants": 0 |
|
} |
|
} |
|
|
|
engagement_levels = defaultdict(int) |
|
total_topics_engaged = 0 |
|
active_participants = 0 |
|
|
|
for student in analytics.get("student_insights", []): |
|
|
|
metrics = student.get("engagement_metrics", {}) |
|
|
|
|
|
participation = metrics.get("participation_level", "low").lower() |
|
engagement_levels[participation] += 1 |
|
|
|
|
|
topics_count = len(student.get("struggling_topics", [])) |
|
total_topics_engaged += topics_count |
|
|
|
|
|
if topics_count > 0: |
|
active_participants += 1 |
|
|
|
|
|
weighted_score = ( |
|
(engagement_levels["high"] * 1.0 + |
|
engagement_levels["medium"] * 0.6 + |
|
engagement_levels["low"] * 0.2) / total_students |
|
) |
|
|
|
return { |
|
"total_students": total_students, |
|
"overall_score": round(weighted_score, 2), |
|
"engagement_distribution": { |
|
level: count/total_students |
|
for level, count in engagement_levels.items() |
|
}, |
|
"participation_metrics": { |
|
"average_topics_per_student": round(total_topics_engaged / total_students, 2), |
|
"active_participants_ratio": round(active_participants / total_students, 2) |
|
} |
|
} |
|
|
|
except Exception as e: |
|
print(f"Error calculating engagement: {str(e)}") |
|
return { |
|
"total_students": 0, |
|
"overall_score": 0, |
|
"engagement_distribution": { |
|
"high": 0, |
|
"medium": 0, |
|
"low": 0 |
|
}, |
|
"participation_metrics": { |
|
"average_topics_per_student": 0, |
|
"active_participants_ratio": 0 |
|
} |
|
} |
|
|
|
def _process_gemini_response(self, response: str) -> Dict: |
|
"""Process and validate Gemini's response.""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
analytics = json.loads(response) |
|
|
|
|
|
required_fields = { |
|
"topic_insights": [], |
|
"student_insights": [], |
|
"recommended_actions": [] |
|
} |
|
|
|
|
|
for field, default_value in required_fields.items(): |
|
if field not in analytics or not analytics[field]: |
|
analytics[field] = default_value |
|
|
|
|
|
return self._enrich_analytics(analytics) |
|
|
|
except (json.JSONDecodeError, KeyError, TypeError) as e: |
|
print(f"Error processing Gemini response: {str(e)}") |
|
print(f"Raw response: {response}") |
|
return self._fallback_analytics() |
|
|
|
def _enrich_analytics(self, analytics: Dict) -> Dict: |
|
"""Add derived insights and metrics to the analytics.""" |
|
|
|
analytics["course_health"] = { |
|
"overall_engagement": self._calculate_engagement(analytics), |
|
"critical_topics": self._identify_critical_topics(analytics), |
|
"class_distribution": self._calculate_class_distribution(analytics) |
|
} |
|
|
|
|
|
analytics["intervention_metrics"] = { |
|
"immediate_attention_needed": self._identify_urgent_cases(analytics), |
|
"monitoring_required": self._identify_monitoring_cases(analytics) |
|
} |
|
|
|
return analytics |
|
|
|
def _calculate_engagement(self, analytics: Dict) -> Dict: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Calculate overall engagement metrics with defensive programming.""" |
|
try: |
|
total_students = len(analytics.get("student_insights", [])) |
|
if total_students == 0: |
|
return { |
|
"total_students": 0, |
|
"engagement_distribution": { |
|
"high": 0, |
|
"medium": 0, |
|
"low": 0 |
|
} |
|
} |
|
|
|
engagement_levels = defaultdict(int) |
|
|
|
for student in analytics.get("student_insights", []): |
|
metrics = student.get("engagement_metrics", {}) |
|
level = metrics.get("participation_level", "low") |
|
engagement_levels[level] += 1 |
|
|
|
return { |
|
"total_students": total_students, |
|
"engagement_distribution": { |
|
level: count/total_students |
|
for level, count in engagement_levels.items() |
|
} |
|
} |
|
except Exception as e: |
|
print(f"Error calculating engagement: {str(e)}") |
|
return { |
|
"total_students": 0, |
|
"engagement_distribution": { |
|
"high": 0, |
|
"medium": 0, |
|
"low": 0 |
|
} |
|
} |
|
|
|
def _identify_critical_topics(self, analytics: Dict) -> List[Dict]: |
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Identify topics needing immediate attention with defensive programming.""" |
|
try: |
|
return [ |
|
topic for topic in analytics.get("topic_insights", []) |
|
if topic.get("difficulty_level", 0) > 0.7 or |
|
len(topic.get("common_issues", [])) > 2 |
|
] |
|
except Exception as e: |
|
print(f"Error identifying critical topics: {str(e)}") |
|
return [] |
|
|
|
def generate_analytics(self, chat_histories: List[Dict], all_topics: List[str]) -> Dict: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Main method to generate analytics with better error handling.""" |
|
try: |
|
|
|
print("Input validation:") |
|
print(f"Chat histories: {len(chat_histories)} entries") |
|
print(f"Topics: {all_topics}") |
|
|
|
if not chat_histories or not all_topics: |
|
print("Missing required input data") |
|
return self._fallback_analytics() |
|
|
|
|
|
try: |
|
processed_histories = self._preprocess_chat_histories(chat_histories) |
|
print("Successfully preprocessed chat histories") |
|
except Exception as preprocess_error: |
|
print(f"Error in preprocessing: {str(preprocess_error)}") |
|
return self._fallback_analytics() |
|
|
|
|
|
try: |
|
prompt = self._create_analytics_prompt(processed_histories, all_topics) |
|
print("Successfully created prompt") |
|
print("Prompt preview:", prompt[:200] + "...") |
|
except Exception as prompt_error: |
|
print(f"Error in prompt creation: {str(prompt_error)}") |
|
return self._fallback_analytics() |
|
|
|
|
|
response = self.model.generate_content( |
|
prompt, |
|
generation_config=genai.GenerationConfig( |
|
response_mime_type="application/json", |
|
temperature=0.15 |
|
) |
|
) |
|
|
|
if not response.text: |
|
print("Empty response from Gemini") |
|
return self._fallback_analytics() |
|
|
|
analytics = json.loads(response.text) |
|
return analytics |
|
|
|
except Exception as e: |
|
print(f"Error generating analytics: {str(e)}") |
|
print(f"Error type: {type(e)}") |
|
import traceback |
|
print("Full traceback:", traceback.format_exc()) |
|
return self._fallback_analytics() |
|
|
|
def _preprocess_chat_histories(self, chat_histories: List[Dict]) -> List[Dict]: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Preprocess chat histories to focus on relevant information.""" |
|
processed = [] |
|
|
|
for chat in chat_histories: |
|
|
|
user_id = str(chat["user_id"]["$oid"]) if isinstance(chat["user_id"], dict) and "$oid" in chat["user_id"] else str(chat["user_id"]) |
|
|
|
try: |
|
processed_chat = { |
|
"user_id": user_id, |
|
"messages": [ |
|
{ |
|
"prompt": msg["prompt"], |
|
"response": msg["response"] |
|
} |
|
for msg in chat["messages"] |
|
] |
|
} |
|
processed.append(processed_chat) |
|
print(f"Successfully processed chat for user: {user_id}") |
|
except Exception as e: |
|
print(f"Error processing chat for user: {user_id}") |
|
print(f"Error details: {str(e)}") |
|
continue |
|
|
|
return processed |
|
|
|
def _fallback_analytics(self) -> Dict: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Provide comprehensive fallback analytics that match our schema.""" |
|
return { |
|
"topic_insights": [], |
|
"student_insights": [], |
|
"recommended_actions": [ |
|
{ |
|
"action": "Review analytics generation process", |
|
"priority": "high", |
|
"target_group": "system_administrators", |
|
"reasoning": "Analytics generation failed", |
|
"expected_impact": "Restore analytics functionality" |
|
} |
|
], |
|
"course_health": { |
|
"overall_engagement": 0, |
|
"critical_topics": [], |
|
"class_distribution": { |
|
"high_performers": 0, |
|
"average_performers": 0, |
|
"at_risk": 0 |
|
} |
|
}, |
|
"intervention_metrics": { |
|
"immediate_attention_needed": [], |
|
"monitoring_required": [] |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|