Final_project / app.py
Dannyar608's picture
Update app.py
463aced verified
raw
history blame
19.3 kB
import gradio as gr
import pandas as pd
import json
import os
import re
from PyPDF2 import PdfReader
from collections import defaultdict
# ========== TRANSCRIPT PARSING FUNCTIONS ==========
def extract_courses_with_grade_levels(text):
grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
current_grade_level = grade_match.group(2) if grade_match else "Unknown"
course_pattern = r"""
(?:^|\n)
(?: (Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)\s*[\n-]* )?
(
(?:[A-Z]{2,}\s?\d{3})
|
[A-Z][a-z]+(?:\s[A-Z][a-z]+)*
)
\s*
(?: [:\-]?\s* ([A-F][+-]?|\d{2,3}%)? )?
"""
courses_by_grade = defaultdict(list)
current_grade = current_grade_level
for match in re.finditer(course_pattern, text, re.VERBOSE | re.MULTILINE):
grade_context, grade_level, course, grade = match.groups()
if grade_context:
current_grade = grade_level
if course:
course_info = {"course": course.strip()}
if grade:
course_info["grade"] = grade.strip()
courses_by_grade[current_grade].append(course_info)
return dict(courses_by_grade)
def parse_transcript(file):
if file.name.endswith('.csv'):
df = pd.read_csv(file)
elif file.name.endswith('.xlsx'):
df = pd.read_excel(file)
elif file.name.endswith('.pdf'):
text = ''
reader = PdfReader(file)
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + '\n'
# Grade level extraction
grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE)
grade_level = grade_match.group(2) if grade_match else "Unknown"
# Enhanced GPA extraction
gpa_data = {'weighted': "N/A", 'unweighted': "N/A"}
gpa_patterns = [
r'Weighted GPA[\s:]*(\d\.\d{1,2})',
r'GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
r'Cumulative GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
r'Unweighted GPA[\s:]*(\d\.\d{1,2})',
r'GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
r'Cumulative GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
r'GPA[\s:]*(\d\.\d{1,2})'
]
for pattern in gpa_patterns:
for match in re.finditer(pattern, text, re.IGNORECASE):
gpa_value = match.group(1)
if 'weighted' in pattern.lower():
gpa_data['weighted'] = gpa_value
elif 'unweighted' in pattern.lower():
gpa_data['unweighted'] = gpa_value
else:
if gpa_data['unweighted'] == "N/A":
gpa_data['unweighted'] = gpa_value
if gpa_data['weighted'] == "N/A":
gpa_data['weighted'] = gpa_value
courses_by_grade = extract_courses_with_grade_levels(text)
output_text = f"Grade Level: {grade_level}\n\n"
if gpa_data['weighted'] != "N/A" or gpa_data['unweighted'] != "N/A":
output_text += "GPA Information:\n"
if gpa_data['unweighted'] != "N/A":
output_text += f"- Unweighted GPA: {gpa_data['unweighted']}\n"
if gpa_data['weighted'] != "N/A":
output_text += f"- Weighted GPA: {gpa_data['weighted']}\n"
else:
output_text += "No GPA information found\n"
output_text += "\n(Courses not shown here)"
return output_text, {
"gpa": gpa_data,
"grade_level": grade_level,
"courses": courses_by_grade
}
else:
return "Unsupported file format", None
# For CSV/XLSX fallback
gpa = "N/A"
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
if col in df.columns:
gpa = df[col].iloc[0] if isinstance(df[col].iloc[0], (float, int)) else "N/A"
break
grade_level = "N/A"
for col in ['Grade Level', 'Grade', 'Class', 'Year']:
if col in df.columns:
grade_level = df[col].iloc[0]
break
courses = []
for col in ['Course', 'Subject', 'Course Name', 'Class']:
if col in df.columns:
courses = df[col].tolist()
break
output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses:\n"
output_text += "\n".join(f"- {course}" for course in courses)
return output_text, {
"gpa": {"unweighted": gpa, "weighted": "N/A"},
"grade_level": grade_level,
"courses": courses
}
# ========== LEARNING STYLE QUIZ ==========
learning_style_questions = [
"When you study for a test, you prefer to:",
"When you need directions to a new place, you prefer:",
"When you learn a new skill, you prefer to:",
"When you're trying to concentrate, you:",
"When you meet new people, you remember them by:",
"When you're relaxing, you prefer to:",
"When you're explaining something to someone, you:",
"When you're trying to remember something, you:",
"When you're in a classroom, you learn best when:",
"When you're trying to solve a problem, you:",
"When you're taking notes, you:",
"When you're learning new software, you prefer to:",
"When you're at a museum, you spend the most time:",
"When you're assembling furniture, you:",
"When you're learning new vocabulary, you:",
"When you're giving a presentation, you prefer:",
"When you're at a party, you enjoy:",
"When you're taking a break from studying, you:",
"When you're learning dance moves, you:",
"When you're choosing a book, you prefer:"
]
learning_style_options = [
["Read the textbook (Reading/Writing)", "Listen to lectures (Auditory)", "Use diagrams/charts (Visual)", "Practice problems (Kinesthetic)"],
["Look at a map (Visual)", "Have someone tell you (Auditory)", "Write down directions (Reading/Writing)", "Try walking/driving there (Kinesthetic)"],
["Read instructions (Reading/Writing)", "Have someone show you (Visual)", "Listen to explanations (Auditory)", "Try it yourself (Kinesthetic)"],
["Need quiet (Reading/Writing)", "Need background noise (Auditory)", "Need to move around (Kinesthetic)", "Need visual stimulation (Visual)"],
["Their face (Visual)", "Their name (Auditory)", "What you talked about (Reading/Writing)", "What you did together (Kinesthetic)"],
["Read (Reading/Writing)", "Listen to music (Auditory)", "Watch TV (Visual)", "Do something active (Kinesthetic)"],
["Write it down (Reading/Writing)", "Tell them verbally (Auditory)", "Show them (Visual)", "Demonstrate physically (Kinesthetic)"],
["See it written down (Visual)", "Say it out loud (Auditory)", "Write it down (Reading/Writing)", "Do it physically (Kinesthetic)"],
["Reading materials (Reading/Writing)", "Listening to lectures (Auditory)", "Seeing diagrams (Visual)", "Doing hands-on activities (Kinesthetic)"],
["Write down steps (Reading/Writing)", "Talk through it (Auditory)", "Draw diagrams (Visual)", "Try different approaches (Kinesthetic)"],
["Write detailed notes (Reading/Writing)", "Record lectures (Auditory)", "Draw mind maps (Visual)", "Take minimal notes (Kinesthetic)"],
["Read the manual (Reading/Writing)", "Have someone explain it (Auditory)", "Watch tutorial videos (Visual)", "Just start using it (Kinesthetic)"],
["Reading descriptions (Reading/Writing)", "Listening to audio guides (Auditory)", "Looking at exhibits (Visual)", "Interactive displays (Kinesthetic)"],
["Read instructions first (Reading/Writing)", "Ask someone to help (Auditory)", "Look at diagrams (Visual)", "Start assembling (Kinesthetic)"],
["Write them repeatedly (Reading/Writing)", "Say them repeatedly (Auditory)", "Use flashcards (Visual)", "Use them in conversation (Kinesthetic)"],
["Having detailed notes (Reading/Writing)", "Speaking freely (Auditory)", "Using visual aids (Visual)", "Demonstrating something (Kinesthetic)"],
["Conversations (Auditory)", "People-watching (Visual)", "Dancing/games (Kinesthetic)", "Reading about people (Reading/Writing)"],
["Read for fun (Reading/Writing)", "Listen to music (Auditory)", "Watch videos (Visual)", "Exercise (Kinesthetic)"],
["Watch demonstrations (Visual)", "Listen to instructions (Auditory)", "Read choreography (Reading/Writing)", "Try the moves (Kinesthetic)"],
["Text-heavy books (Reading/Writing)", "Audiobooks (Auditory)", "Books with pictures (Visual)", "Interactive books (Kinesthetic)"]
]
def learning_style_quiz(*answers):
scores = {
"Visual": 0,
"Auditory": 0,
"Reading/Writing": 0,
"Kinesthetic": 0
}
# Map each answer to a learning style
for i, answer in enumerate(answers):
if answer in learning_style_options[i][0]:
scores["Reading/Writing"] += 1
elif answer in learning_style_options[i][1]:
scores["Auditory"] += 1
elif answer in learning_style_options[i][2]:
scores["Visual"] += 1
elif answer in learning_style_options[i][3]:
scores["Kinesthetic"] += 1
# Get the highest score(s)
max_score = max(scores.values())
dominant_styles = [style for style, score in scores.items() if score == max_score]
# Generate result
if len(dominant_styles) == 1:
result = f"Your primary learning style is: {dominant_styles[0]}"
else:
result = f"You have multiple strong learning styles: {', '.join(dominant_styles)}"
# Add detailed breakdown
result += "\n\nDetailed Scores:\n"
for style, score in sorted(scores.items(), key=lambda x: x[1], reverse=True):
result += f"{style}: {score}/20\n"
return result
# ========== SAVE STUDENT PROFILE FUNCTION ==========
def save_profile(name, age, interests, transcript, learning_style, favorites, blog):
data = {
"name": name,
"age": age,
"interests": interests,
"transcript": transcript,
"learning_style": learning_style,
"favorites": favorites,
"blog": blog
}
os.makedirs("student_profiles", exist_ok=True)
json_path = os.path.join("student_profiles", f"{name.replace(' ', '_')}_profile.json")
with open(json_path, "w") as f:
json.dump(data, f, indent=2)
markdown_summary = f"""### Student Profile: {name}
**Age:** {age}
**Interests:** {interests}
**Learning Style:** {learning_style}
#### Transcript:
{transcript_display(transcript)}
#### Favorites:
- Movie: {favorites['movie']} ({favorites['movie_reason']})
- Show: {favorites['show']} ({favorites['show_reason']})
- Book: {favorites['book']} ({favorites['book_reason']})
- Character: {favorites['character']} ({favorites['character_reason']})
#### Blog:
{blog if blog else "_No blog provided_"}
"""
return markdown_summary
def transcript_display(transcript_dict):
if not transcript_dict:
return "No transcript uploaded."
if isinstance(transcript_dict, dict) and all(isinstance(v, list) for v in transcript_dict.values()):
display = ""
for grade_level, courses in transcript_dict.items():
display += f"\n**Grade {grade_level}**\n"
for course in courses:
display += f"- {course['course']}"
if 'grade' in course:
display += f" (Grade: {course['grade']})"
display += "\n"
return display
return "\n".join([f"- {course}" for course in transcript_dict["courses"]] +
[f"Grade Level: {transcript_dict['grade_level']}", f"GPA: {transcript_dict['gpa']}"])
# ========== GRADIO INTERFACE ==========
with gr.Blocks() as app:
with gr.Tab("Step 1: Upload Transcript"):
transcript_file = gr.File(label="Upload your transcript (CSV, Excel, or PDF)")
transcript_output = gr.Textbox(label="Transcript Output")
transcript_data = gr.State()
transcript_file.change(fn=parse_transcript, inputs=transcript_file, outputs=[transcript_output, transcript_data])
with gr.Tab("Step 2: Learning Style Quiz"):
gr.Markdown("### Complete this 20-question quiz to determine your learning style")
quiz_components = []
for i, (question, options) in enumerate(zip(learning_style_questions, learning_style_options)):
quiz_components.append(
gr.Radio(choices=options, label=f"{i+1}. {question}")
)
learning_output = gr.Textbox(label="Learning Style Result", lines=5)
gr.Button("Submit Quiz").click(
learning_style_quiz,
inputs=quiz_components,
outputs=learning_output
)
with gr.Tab("Step 3: Personal Questions"):
name = gr.Textbox(label="What's your name?")
age = gr.Number(label="How old are you?")
interests = gr.Textbox(label="What are your interests?")
movie = gr.Textbox(label="Favorite movie?")
movie_reason = gr.Textbox(label="Why do you like that movie?")
show = gr.Textbox(label="Favorite TV show?")
show_reason = gr.Textbox(label="Why do you like that show?")
book = gr.Textbox(label="Favorite book?")
book_reason = gr.Textbox(label="Why do you like that book?")
character = gr.Textbox(label="Favorite character?")
character_reason = gr.Textbox(label="Why do you like that character?")
blog_checkbox = gr.Checkbox(label="Do you want to write a blog?", value=False)
blog_text = gr.Textbox(label="Write your blog here", visible=False, lines=5)
blog_checkbox.change(fn=lambda x: gr.update(visible=x), inputs=blog_checkbox, outputs=blog_text)
with gr.Tab("Step 4: Save & Review"):
output_summary = gr.Markdown()
save_btn = gr.Button("Save Profile")
def gather_and_save(name, age, interests, movie, movie_reason, show, show_reason,
book, book_reason, character, character_reason, blog, transcript, learning_style):
favorites = {
"movie": movie,
"movie_reason": movie_reason,
"show": show,
"show_reason": show_reason,
"book": book,
"book_reason": book_reason,
"character": character,
"character_reason": character_reason,
}
return save_profile(name, age, interests, transcript, learning_style, favorites, blog)
save_btn.click(fn=gather_and_save,
inputs=[name, age, interests, movie, movie_reason, show, show_reason,
book, book_reason, character, character_reason, blog_text,
transcript_data, learning_output],
outputs=output_summary)
# Add these new imports at the top
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from openai import OpenAI # Make sure to install with pip install openai
# ========== AI CHATBOT SETUP ==========
# Initialize DeepSeek model for information retrieval
deepseek_model_name = "deepseek-ai/deepseek-llm-7b"
deepseek_tokenizer = AutoTokenizer.from_pretrained(deepseek_model_name)
deepseek_model = AutoModelForCausalLM.from_pretrained(deepseek_model_name, torch_dtype=torch.float16)
# Initialize ChatGPT (you'll need an OpenAI API key)
client = OpenAI(api_key="your-openai-api-key") # Replace with your actual API key
def retrieve_information_with_deepseek(query, student_profile):
# Prepare context from student profile
profile_context = f"""
Student Profile:
Name: {student_profile.get('name', 'N/A')}
Age: {student_profile.get('age', 'N/A')}
Grade Level: {student_profile.get('transcript', {}).get('grade_level', 'N/A')}
GPA: {student_profile.get('transcript', {}).get('gpa', {}).get('unweighted', 'N/A')} (Unweighted)
Learning Style: {student_profile.get('learning_style', 'N/A')}
Interests: {student_profile.get('interests', 'N/A')}
"""
# Format the prompt for DeepSeek
prompt = f"""
[CONTEXT]
{profile_context}
[QUERY]
{query}
Based on the student profile and educational context, provide the most accurate and relevant information to answer the query.
"""
# Generate response with DeepSeek
inputs = deepseek_tokenizer(prompt, return_tensors="pt")
outputs = deepseek_model.generate(**inputs, max_new_tokens=200)
accurate_response = deepseek_tokenizer.decode(outputs[0], skip_special_tokens=True)
return accurate_response
def generate_chat_response_with_chatgpt(message, history, student_profile):
# First retrieve accurate information with DeepSeek
accurate_info = retrieve_information_with_deepseek(message, student_profile)
# Prepare conversation history
chat_history = "\n".join([f"User: {h[0]}\nAI: {h[1]}" for h in history])
# Create ChatGPT prompt
prompt = f"""
You are a personalized teaching assistant. Use the following accurate information to craft a natural, helpful response:
[ACCURATE INFORMATION]
{accurate_info}
[CONVERSATION HISTORY]
{chat_history}
[NEW MESSAGE]
User: {message}
Respond in a friendly, conversational tone while ensuring all factual information remains accurate.
"""
# Get response from ChatGPT
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful teaching assistant."},
{"role": "user", "content": prompt}
],
temperature=0.7
)
return response.choices[0].message.content
# ========== UPDATE GRADIO INTERFACE ==========
# Add this new tab to your existing with gr.Blocks() as app:
with gr.Blocks() as app:
# ... (keep all your existing tabs) ...
with gr.Tab("🤖 AI Teaching Assistant"):
gr.Markdown("## Your Personalized Learning Assistant")
gr.Markdown("Chat with your AI assistant for personalized learning support")
chatbot = gr.ChatInterface(
fn=lambda message, history: generate_chat_response_with_chatgpt(
message,
history,
student_profile=gr.State()
),
examples=[
"How should I study for my math test?",
"Can you explain this concept to me in a way that matches my learning style?",
"What are some good study strategies based on my GPA?",
"How can I improve my grades in science?"
],
additional_inputs=[transcript_data, learning_output]
)
# This connects the profile data to the chatbot
@app.load
def load_profile():
profile_path = os.path.join("student_profiles", "student_profile.json")
if os.path.exists(profile_path):
with open(profile_path, "r") as f:
return json.load(f)
return {}
app.launch()