Spaces:

Dannyar608
/

Final_project

Runtime error

File size: 19,289 Bytes

import gradio as gr
import pandas as pd
import json
import os
import re
from PyPDF2 import PdfReader
from collections import defaultdict

# ========== TRANSCRIPT PARSING FUNCTIONS ==========
def extract_courses_with_grade_levels(text):
    grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
    grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
    current_grade_level = grade_match.group(2) if grade_match else "Unknown"

    course_pattern = r"""
        (?:^|\n)
        (?: (Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)\s*[\n-]* )?
        (
            (?:[A-Z]{2,}\s?\d{3})
            |
            [A-Z][a-z]+(?:\s[A-Z][a-z]+)*
        )
        \s*
        (?: [:\-]?\s* ([A-F][+-]?|\d{2,3}%)? )?
    """

    courses_by_grade = defaultdict(list)
    current_grade = current_grade_level

    for match in re.finditer(course_pattern, text, re.VERBOSE | re.MULTILINE):
        grade_context, grade_level, course, grade = match.groups()

        if grade_context:
            current_grade = grade_level

        if course:
            course_info = {"course": course.strip()}
            if grade:
                course_info["grade"] = grade.strip()
            courses_by_grade[current_grade].append(course_info)

    return dict(courses_by_grade)

def parse_transcript(file):
    if file.name.endswith('.csv'):
        df = pd.read_csv(file)
    elif file.name.endswith('.xlsx'):
        df = pd.read_excel(file)
    elif file.name.endswith('.pdf'):
        text = ''
        reader = PdfReader(file)
        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + '\n'

        # Grade level extraction
        grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE)
        grade_level = grade_match.group(2) if grade_match else "Unknown"

        # Enhanced GPA extraction
        gpa_data = {'weighted': "N/A", 'unweighted': "N/A"}
        gpa_patterns = [
            r'Weighted GPA[\s:]*(\d\.\d{1,2})',
            r'GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
            r'Cumulative GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
            r'Unweighted GPA[\s:]*(\d\.\d{1,2})',
            r'GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
            r'Cumulative GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
            r'GPA[\s:]*(\d\.\d{1,2})'
        ]
        for pattern in gpa_patterns:
            for match in re.finditer(pattern, text, re.IGNORECASE):
                gpa_value = match.group(1)
                if 'weighted' in pattern.lower():
                    gpa_data['weighted'] = gpa_value
                elif 'unweighted' in pattern.lower():
                    gpa_data['unweighted'] = gpa_value
                else:
                    if gpa_data['unweighted'] == "N/A":
                        gpa_data['unweighted'] = gpa_value
                    if gpa_data['weighted'] == "N/A":
                        gpa_data['weighted'] = gpa_value

        courses_by_grade = extract_courses_with_grade_levels(text)

        output_text = f"Grade Level: {grade_level}\n\n"
        if gpa_data['weighted'] != "N/A" or gpa_data['unweighted'] != "N/A":
            output_text += "GPA Information:\n"
            if gpa_data['unweighted'] != "N/A":
                output_text += f"- Unweighted GPA: {gpa_data['unweighted']}\n"
            if gpa_data['weighted'] != "N/A":
                output_text += f"- Weighted GPA: {gpa_data['weighted']}\n"
        else:
            output_text += "No GPA information found\n"

        output_text += "\n(Courses not shown here)"

        return output_text, {
            "gpa": gpa_data,
            "grade_level": grade_level,
            "courses": courses_by_grade
        }
    else:
        return "Unsupported file format", None

    # For CSV/XLSX fallback
    gpa = "N/A"
    for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
        if col in df.columns:
            gpa = df[col].iloc[0] if isinstance(df[col].iloc[0], (float, int)) else "N/A"
            break

    grade_level = "N/A"
    for col in ['Grade Level', 'Grade', 'Class', 'Year']:
        if col in df.columns:
            grade_level = df[col].iloc[0]
            break

    courses = []
    for col in ['Course', 'Subject', 'Course Name', 'Class']:
        if col in df.columns:
            courses = df[col].tolist()
            break

    output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses:\n"
    output_text += "\n".join(f"- {course}" for course in courses)

    return output_text, {
        "gpa": {"unweighted": gpa, "weighted": "N/A"},
        "grade_level": grade_level,
        "courses": courses
    }

# ========== LEARNING STYLE QUIZ ==========
learning_style_questions = [
    "When you study for a test, you prefer to:",
    "When you need directions to a new place, you prefer:",
    "When you learn a new skill, you prefer to:",
    "When you're trying to concentrate, you:",
    "When you meet new people, you remember them by:",
    "When you're relaxing, you prefer to:",
    "When you're explaining something to someone, you:",
    "When you're trying to remember something, you:",
    "When you're in a classroom, you learn best when:",
    "When you're trying to solve a problem, you:",
    "When you're taking notes, you:",
    "When you're learning new software, you prefer to:",
    "When you're at a museum, you spend the most time:",
    "When you're assembling furniture, you:",
    "When you're learning new vocabulary, you:",
    "When you're giving a presentation, you prefer:",
    "When you're at a party, you enjoy:",
    "When you're taking a break from studying, you:",
    "When you're learning dance moves, you:",
    "When you're choosing a book, you prefer:"
]

learning_style_options = [
    ["Read the textbook (Reading/Writing)", "Listen to lectures (Auditory)", "Use diagrams/charts (Visual)", "Practice problems (Kinesthetic)"],
    ["Look at a map (Visual)", "Have someone tell you (Auditory)", "Write down directions (Reading/Writing)", "Try walking/driving there (Kinesthetic)"],
    ["Read instructions (Reading/Writing)", "Have someone show you (Visual)", "Listen to explanations (Auditory)", "Try it yourself (Kinesthetic)"],
    ["Need quiet (Reading/Writing)", "Need background noise (Auditory)", "Need to move around (Kinesthetic)", "Need visual stimulation (Visual)"],
    ["Their face (Visual)", "Their name (Auditory)", "What you talked about (Reading/Writing)", "What you did together (Kinesthetic)"],
    ["Read (Reading/Writing)", "Listen to music (Auditory)", "Watch TV (Visual)", "Do something active (Kinesthetic)"],
    ["Write it down (Reading/Writing)", "Tell them verbally (Auditory)", "Show them (Visual)", "Demonstrate physically (Kinesthetic)"],
    ["See it written down (Visual)", "Say it out loud (Auditory)", "Write it down (Reading/Writing)", "Do it physically (Kinesthetic)"],
    ["Reading materials (Reading/Writing)", "Listening to lectures (Auditory)", "Seeing diagrams (Visual)", "Doing hands-on activities (Kinesthetic)"],
    ["Write down steps (Reading/Writing)", "Talk through it (Auditory)", "Draw diagrams (Visual)", "Try different approaches (Kinesthetic)"],
    ["Write detailed notes (Reading/Writing)", "Record lectures (Auditory)", "Draw mind maps (Visual)", "Take minimal notes (Kinesthetic)"],
    ["Read the manual (Reading/Writing)", "Have someone explain it (Auditory)", "Watch tutorial videos (Visual)", "Just start using it (Kinesthetic)"],
    ["Reading descriptions (Reading/Writing)", "Listening to audio guides (Auditory)", "Looking at exhibits (Visual)", "Interactive displays (Kinesthetic)"],
    ["Read instructions first (Reading/Writing)", "Ask someone to help (Auditory)", "Look at diagrams (Visual)", "Start assembling (Kinesthetic)"],
    ["Write them repeatedly (Reading/Writing)", "Say them repeatedly (Auditory)", "Use flashcards (Visual)", "Use them in conversation (Kinesthetic)"],
    ["Having detailed notes (Reading/Writing)", "Speaking freely (Auditory)", "Using visual aids (Visual)", "Demonstrating something (Kinesthetic)"],
    ["Conversations (Auditory)", "People-watching (Visual)", "Dancing/games (Kinesthetic)", "Reading about people (Reading/Writing)"],
    ["Read for fun (Reading/Writing)", "Listen to music (Auditory)", "Watch videos (Visual)", "Exercise (Kinesthetic)"],
    ["Watch demonstrations (Visual)", "Listen to instructions (Auditory)", "Read choreography (Reading/Writing)", "Try the moves (Kinesthetic)"],
    ["Text-heavy books (Reading/Writing)", "Audiobooks (Auditory)", "Books with pictures (Visual)", "Interactive books (Kinesthetic)"]
]

def learning_style_quiz(*answers):
    scores = {
        "Visual": 0,
        "Auditory": 0,
        "Reading/Writing": 0,
        "Kinesthetic": 0
    }
    
    # Map each answer to a learning style
    for i, answer in enumerate(answers):
        if answer in learning_style_options[i][0]:
            scores["Reading/Writing"] += 1
        elif answer in learning_style_options[i][1]:
            scores["Auditory"] += 1
        elif answer in learning_style_options[i][2]:
            scores["Visual"] += 1
        elif answer in learning_style_options[i][3]:
            scores["Kinesthetic"] += 1
    
    # Get the highest score(s)
    max_score = max(scores.values())
    dominant_styles = [style for style, score in scores.items() if score == max_score]
    
    # Generate result
    if len(dominant_styles) == 1:
        result = f"Your primary learning style is: {dominant_styles[0]}"
    else:
        result = f"You have multiple strong learning styles: {', '.join(dominant_styles)}"
    
    # Add detailed breakdown
    result += "\n\nDetailed Scores:\n"
    for style, score in sorted(scores.items(), key=lambda x: x[1], reverse=True):
        result += f"{style}: {score}/20\n"
    
    return result

# ========== SAVE STUDENT PROFILE FUNCTION ==========
def save_profile(name, age, interests, transcript, learning_style, favorites, blog):
    data = {
        "name": name,
        "age": age,
        "interests": interests,
        "transcript": transcript,
        "learning_style": learning_style,
        "favorites": favorites,
        "blog": blog
    }
    os.makedirs("student_profiles", exist_ok=True)
    json_path = os.path.join("student_profiles", f"{name.replace(' ', '_')}_profile.json")
    with open(json_path, "w") as f:
        json.dump(data, f, indent=2)

    markdown_summary = f"""### Student Profile: {name}
**Age:** {age}  
**Interests:** {interests}  
**Learning Style:** {learning_style}  
#### Transcript:
{transcript_display(transcript)}
#### Favorites:
- Movie: {favorites['movie']} ({favorites['movie_reason']})
- Show: {favorites['show']} ({favorites['show_reason']})
- Book: {favorites['book']} ({favorites['book_reason']})
- Character: {favorites['character']} ({favorites['character_reason']})
#### Blog:
{blog if blog else "_No blog provided_"}
"""
    return markdown_summary

def transcript_display(transcript_dict):
    if not transcript_dict:
        return "No transcript uploaded."
    if isinstance(transcript_dict, dict) and all(isinstance(v, list) for v in transcript_dict.values()):
        display = ""
        for grade_level, courses in transcript_dict.items():
            display += f"\n**Grade {grade_level}**\n"
            for course in courses:
                display += f"- {course['course']}"
                if 'grade' in course:
                    display += f" (Grade: {course['grade']})"
                display += "\n"
        return display
    return "\n".join([f"- {course}" for course in transcript_dict["courses"]] +
                     [f"Grade Level: {transcript_dict['grade_level']}", f"GPA: {transcript_dict['gpa']}"])

# ========== GRADIO INTERFACE ==========
with gr.Blocks() as app:
    with gr.Tab("Step 1: Upload Transcript"):
        transcript_file = gr.File(label="Upload your transcript (CSV, Excel, or PDF)")
        transcript_output = gr.Textbox(label="Transcript Output")
        transcript_data = gr.State()
        transcript_file.change(fn=parse_transcript, inputs=transcript_file, outputs=[transcript_output, transcript_data])

    with gr.Tab("Step 2: Learning Style Quiz"):
        gr.Markdown("### Complete this 20-question quiz to determine your learning style")
        quiz_components = []
        for i, (question, options) in enumerate(zip(learning_style_questions, learning_style_options)):
            quiz_components.append(
                gr.Radio(choices=options, label=f"{i+1}. {question}")
            )
        
        learning_output = gr.Textbox(label="Learning Style Result", lines=5)
        gr.Button("Submit Quiz").click(
            learning_style_quiz,
            inputs=quiz_components,
            outputs=learning_output
        )

    with gr.Tab("Step 3: Personal Questions"):
        name = gr.Textbox(label="What's your name?")
        age = gr.Number(label="How old are you?")
        interests = gr.Textbox(label="What are your interests?")
        movie = gr.Textbox(label="Favorite movie?")
        movie_reason = gr.Textbox(label="Why do you like that movie?")
        show = gr.Textbox(label="Favorite TV show?")
        show_reason = gr.Textbox(label="Why do you like that show?")
        book = gr.Textbox(label="Favorite book?")
        book_reason = gr.Textbox(label="Why do you like that book?")
        character = gr.Textbox(label="Favorite character?")
        character_reason = gr.Textbox(label="Why do you like that character?")
        blog_checkbox = gr.Checkbox(label="Do you want to write a blog?", value=False)
        blog_text = gr.Textbox(label="Write your blog here", visible=False, lines=5)
        blog_checkbox.change(fn=lambda x: gr.update(visible=x), inputs=blog_checkbox, outputs=blog_text)

    with gr.Tab("Step 4: Save & Review"):
        output_summary = gr.Markdown()
        save_btn = gr.Button("Save Profile")

        def gather_and_save(name, age, interests, movie, movie_reason, show, show_reason,
                          book, book_reason, character, character_reason, blog, transcript, learning_style):
            favorites = {
                "movie": movie,
                "movie_reason": movie_reason,
                "show": show,
                "show_reason": show_reason,
                "book": book,
                "book_reason": book_reason,
                "character": character,
                "character_reason": character_reason,
            }
            return save_profile(name, age, interests, transcript, learning_style, favorites, blog)

        save_btn.click(fn=gather_and_save,
                     inputs=[name, age, interests, movie, movie_reason, show, show_reason,
                            book, book_reason, character, character_reason, blog_text,
                            transcript_data, learning_output],
                     outputs=output_summary)
# Add these new imports at the top
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from openai import OpenAI  # Make sure to install with pip install openai

# ========== AI CHATBOT SETUP ==========
# Initialize DeepSeek model for information retrieval
deepseek_model_name = "deepseek-ai/deepseek-llm-7b"
deepseek_tokenizer = AutoTokenizer.from_pretrained(deepseek_model_name)
deepseek_model = AutoModelForCausalLM.from_pretrained(deepseek_model_name, torch_dtype=torch.float16)

# Initialize ChatGPT (you'll need an OpenAI API key)
client = OpenAI(api_key="your-openai-api-key")  # Replace with your actual API key

def retrieve_information_with_deepseek(query, student_profile):
    # Prepare context from student profile
    profile_context = f"""
    Student Profile:
    Name: {student_profile.get('name', 'N/A')}
    Age: {student_profile.get('age', 'N/A')}
    Grade Level: {student_profile.get('transcript', {}).get('grade_level', 'N/A')}
    GPA: {student_profile.get('transcript', {}).get('gpa', {}).get('unweighted', 'N/A')} (Unweighted)
    Learning Style: {student_profile.get('learning_style', 'N/A')}
    Interests: {student_profile.get('interests', 'N/A')}
    """
    
    # Format the prompt for DeepSeek
    prompt = f"""
    [CONTEXT]
    {profile_context}
    
    [QUERY]
    {query}
    
    Based on the student profile and educational context, provide the most accurate and relevant information to answer the query.
    """
    
    # Generate response with DeepSeek
    inputs = deepseek_tokenizer(prompt, return_tensors="pt")
    outputs = deepseek_model.generate(**inputs, max_new_tokens=200)
    accurate_response = deepseek_tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return accurate_response

def generate_chat_response_with_chatgpt(message, history, student_profile):
    # First retrieve accurate information with DeepSeek
    accurate_info = retrieve_information_with_deepseek(message, student_profile)
    
    # Prepare conversation history
    chat_history = "\n".join([f"User: {h[0]}\nAI: {h[1]}" for h in history])
    
    # Create ChatGPT prompt
    prompt = f"""
    You are a personalized teaching assistant. Use the following accurate information to craft a natural, helpful response:
    
    [ACCURATE INFORMATION]
    {accurate_info}
    
    [CONVERSATION HISTORY]
    {chat_history}
    
    [NEW MESSAGE]
    User: {message}
    
    Respond in a friendly, conversational tone while ensuring all factual information remains accurate.
    """
    
    # Get response from ChatGPT
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful teaching assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7
    )
    
    return response.choices[0].message.content

# ========== UPDATE GRADIO INTERFACE ==========
# Add this new tab to your existing with gr.Blocks() as app:
with gr.Blocks() as app:
    # ... (keep all your existing tabs) ...
    
    with gr.Tab("🤖 AI Teaching Assistant"):
        gr.Markdown("## Your Personalized Learning Assistant")
        gr.Markdown("Chat with your AI assistant for personalized learning support")
        
        chatbot = gr.ChatInterface(
            fn=lambda message, history: generate_chat_response_with_chatgpt(
                message, 
                history, 
                student_profile=gr.State()
            ),
            examples=[
                "How should I study for my math test?",
                "Can you explain this concept to me in a way that matches my learning style?",
                "What are some good study strategies based on my GPA?",
                "How can I improve my grades in science?"
            ],
            additional_inputs=[transcript_data, learning_output]
        )
        
        # This connects the profile data to the chatbot
        @app.load
        def load_profile():
            profile_path = os.path.join("student_profiles", "student_profile.json")
            if os.path.exists(profile_path):
                with open(profile_path, "r") as f:
                    return json.load(f)
            return {}

app.launch()