Spaces:

helliun
/

metaknowledge

Sleeping

File size: 15,138 Bytes

import pandas as pd
from openai import OpenAI
import json
import gradio as gr

client = OpenAI()

def generate_questions(category, num_categories, num_questions):
    if category == "":
        category = "general knowledge"
    print(category)
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Break the category \"{category}\" into {num_categories} subcategories, and for each subcategory create {num_questions} True/False questions ranging from a question a Beginner would know to a question only an Expert would know. There should be as many True as False, and the structure of the questions should not make it obvious which is the answer. Only experts should get the hard questions right. Provide the correct answers and a field with a 1 sentence explanation. This will total out to {num_categories * num_questions} questions. Output just a JSON, nothing else. Below is an example JSON output for \"nutrition\" where 6 categories of 5 questions were requested, but remember, for you, there should be a total of {num_categories * num_questions} questions with {num_categories} categories and {num_questions} questions per category.\n\n```json\n{{\n  \"Macronutrients\": [\n    {{\n      \"question\": \"Protein is one of the three primary macronutrients.\",\n      \"answer\": True,\n      \"explanation\": \"Protein is one of the three primary macronutrients, along with carbohydrates and fats.\"\n    }},\n    {{\n      \"question\": \"Carbohydrates are the body's main source of energy.\",\n      \"answer\": True,\n      \"explanation\": \"Carbohydrates are typically the body's preferred energy source.\"\n    }},\n    {{\n      \"question\": \"Fats have the same caloric content per gram as carbohydrates.\",\n      \"answer\": False,\n      \"explanation\": \"Fats have 9 calories per gram, while carbohydrates have 4 calories per gram.\"\n    }},\n    {{\n      \"question\": \"All proteins are equally effective for muscle growth.\",\n      \"answer\": False,\n      \"explanation\": \"Different proteins have varying amino acid profiles and bioavailability, affecting their effectiveness.\"\n    }},\n    {{\n      \"question\": \"Omega-3 fatty acids are a type of fat that can reduce inflammation.\",\n      \"answer\": True,\n      \"explanation\": \"Omega-3 fatty acids, found in foods like fish, are known to have anti-inflammatory properties.\"\n    }}\n  ],\n  \"Micronutrients\": [\n    {{ ...\"}}"
                    }
                ]
            }
        ],
        response_format={"type": "json_object"},
        temperature=1,
        max_tokens=4071,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return json.loads(response.choices[0].message.content)

# Function to calculate Metaknowledge score
def calculate_meta_cog_score(df):
    df['Correct'] = df['User Answer'] == df['Correct Answer']
    df['C'] = df['Correct'].apply(lambda x: 1 if x else -1)
    n = len(df)
    sum_C_Conf = (df['C'] * df['Confidence']).sum()
    meta_cog_ratio = 0.5 + (sum_C_Conf / (2 * n))
    return meta_cog_ratio

def display_current_question(questions, index):
    if index < len(questions):
        question = questions[index]
        return (
            f"**Question {index + 1}:** {question['question']}",
            None, None, True
        )
    else:
        return ("", None, None, False)

def calculate_scores(df):
    df['Correct'] = df['User Answer'] == df['Correct Answer']
    df['C'] = df['Correct'].apply(lambda x: 1 if x else 0)

    # Expected score based on confidence
    df['Expected Score'] = df['Confidence']
    df['Actual Score'] = df['C']

    # Difference between expected and actual scores
    df['Overconfidence'] = (df['Expected Score'] > df['Actual Score']).astype(float) * (df['Expected Score'] - df['Actual Score'])
    df['Underconfidence'] = (df['Expected Score'] < df['Actual Score']).astype(float) * (df['Actual Score'] - df['Expected Score'])

    n = len(df)
    sum_C_Conf = (df['C'] * df['Confidence']).sum()
    meta_cog_ratio = 0.5 + (sum_C_Conf / (2 * n))

    accuracy = df['Correct'].mean()
    overconfidence = df['Overconfidence'].sum() / n
    underconfidence = df['Underconfidence'].sum() / n

    return {
        # 'Metaknowledge Score': f"{round(meta_cog_ratio * 100, 0)}%",
        'Accuracy': f"{round(accuracy * 100, 0)}%",
        'Overconfidence': f"{round(overconfidence * 100, 0)}%",
        'Underconfidence': f"{round(underconfidence * 100, 0)}%"
    }

# Function to analyze results using GPT-4o-mini
def analyze_results(df, overall_scores, subcategory_scores):
    # Prepare the data for analysis
    questions = df['Question'].tolist()
    correct_answers = df['Correct Answer'].tolist()
    user_answers = df['User Answer'].tolist()
    explanations = df['Explanation'].tolist()
    confidence = df['Confidence'].tolist()
    subcategories = df['Section'].tolist()

    # Generate a summary of the results
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": f"""
                Analyze the following quiz results:
                - Overall Accuracy: {overall_scores['Accuracy']}
                - Overall Overconfidence: {overall_scores['Overconfidence']}
                - Overall Underconfidence: {overall_scores['Underconfidence']}

                Section scores:
                {subcategory_scores}

                The following is a list of my answers and confidence levels for each question, with the correct answers and subcategory:
                {list(zip(questions, user_answers, correct_answers, explanations, confidence, subcategories))}

                Provide an analysis of what I got wrong in terms of overall sections and specific questions, as well as what I was overconfident and underconfident in. Don't use numbers, as they're already displayed elsewhere.
                The analysis should be only about 2 paragraphs. Write the subcategory names in bold when you use them.
                """
            }
        ],
        # response_format={ "type": "json_object" },
        temperature=0.7,
        max_tokens=1024,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )

    analysis = response.choices[0].message.content

    # Start the table with larger column titles using <b> for bold and <span> for custom styling
    question_details = (
        "<table><thead><tr>"
        "<th><b><span style='font-size:16px'>Question</span></b></th>"
        "<th><b><span style='font-size:16px'>User Answer</span></b></th>"
        "<th><b><span style='font-size:16px'>Correct Answer</span></b></th>"
        "<th><b><span style='font-size:16px'>Explanation</span></b></th>"
        "</tr></thead><tbody>"
    )

    for q, ua, ca, subcategory, e in zip(questions, user_answers, correct_answers, subcategories, explanations):
        user_answer_str = 'True' if ua else 'False'
        correct_answer_str = 'True' if ca else 'False'

        # Check if the answer is incorrect
        if ua != ca:
            question_details += (
                f"<tr><td><b>{q}</b></td><td><b>{user_answer_str}</b></td>"
                f"<td><b>{correct_answer_str}</b></td><td><b>{e}</b></td></tr>"
            )
        else:
            question_details += (
                f"<tr><td>{q}</td><td>{user_answer_str}</td>"
                f"<td>{correct_answer_str}</td><td>{e}</td></tr>"
            )

    question_details += "</tbody></table>"

    return f"## Analysis of Results\n\n{analysis}\n\n## Detailed Questions and Answers\n\n{question_details}"

# Modify the submit_answer function to include analysis
def submit_answer(category, num_categories, num_questions, questions, index, user_answer, confidence, user_answers):
    question_data = questions[index]
    subcategory = question_data["subcategory"]

    user_answers.append({
        "Question": question_data["question"],
        "Explanation": question_data["explanation"],
        "User Answer": confidence > 0.5,
        "Correct Answer": question_data["answer"],
        "Confidence": 2*abs(confidence-0.5),
        "Section": subcategory
    })
    index += 1

    if index >= len(questions):
        df = pd.DataFrame(user_answers)
        overall_scores = calculate_scores(df)
        subcategory_scores = df.groupby('Section').apply(calculate_scores).to_dict()
        analysis = analyze_results(df, overall_scores, subcategory_scores)

        overall_score_df = pd.DataFrame([["Overall", *overall_scores.values()]], columns=['Section', 'Accuracy', 'Overconfidence', 'Underconfidence'])
        subcategory_scores_df = pd.DataFrame([(subcategory, *score.values()) for subcategory, score in subcategory_scores.items()], columns=['Section', 'Accuracy', 'Overconfidence', 'Underconfidence'])
        results_df = pd.concat([overall_score_df, subcategory_scores_df], ignore_index=True)
        results_df = gr.DataFrame(label="Results", value=results_df, visible=True)
        return "", index, gr.update(visible=False), user_answers, results_df, gr.update(visible=False), gr.update(visible=False), gr.update(value=analysis, visible=True), gr.update(visible=False)
    else:
        question_text, _, _, visible = display_current_question(questions, index)
        return question_text, index, gr.update(visible=True), user_answers, gr.update(visible=False), gr.update(visible=True, value=0.5), gr.update(visible=False, value=None), gr.update(visible=False), gr.update(visible=True)

# Gradio UI setup
with gr.Blocks(theme="soft", css="footer{display:none !important}") as app:
    # with gr.Row():
        # gr.Markdown("""## &nbsp; Deep Quizzer <img src="file/Subject.png" img align="left" width="20" height="20" />""")
        # gr.Markdown("""<img src="file/Subject.png" img align="left" width="40" height="40" />""")
    gr.Markdown("""Discover what you truly know and ***how aware you are of your knowledge***. Deep Quizzer identifies gaps in your understanding and helps boost your confidence in areas you excel. Take a quiz to sharpen your skills and knowledge today!""")
    with gr.Row():
      category_input = gr.Textbox(label="Topic", placeholder="general knowledge", scale=4)
      num_categories_input = gr.Number(label="Number of Sections", value=5, scale=1, maximum=6)
      num_questions_input = gr.Number(label="Questions per Section", value=5, scale=1, maximum=6)
      total_questions_display = gr.Number(label="Total Questions in Quiz", value=25, scale=1)
    submit_category = gr.Button("Generate Quiz")
    question_area = gr.Markdown(visible=False)
    answer_area = gr.Radio(["True", "False"], label="Your Answer", visible=False)
    with gr.Column():
        confidence_slider = gr.Slider(0, 1, value=0.5, visible=False, container=False)#, label="Confidence Level",)
        with gr.Row(visible=False) as guides:
            gr.Markdown('<p style="text-align: left;">False</p>')
            gr.Markdown('<p style="text-align: left;">Maybe False</p>')
            gr.Markdown('<p style="text-align: center;">Not Sure</p>')
            gr.Markdown('<p style="text-align: right;">Maybe True</p>')
            gr.Markdown('<p style="text-align: right;">True</p>')

    submit_answer_btn = gr.Button("Submit Answer", visible=False)
    result_area = gr.DataFrame(label="Results", visible=False)
    loading_text = gr.Textbox(label="Generating Quiz...", visible=False)
    analysis_area = gr.Markdown(visible=False)  # Add this line for analysis area
    questions_state = gr.State()
    index_state = gr.State(0)
    user_answers_state = gr.State([])

    def on_generate_quiz(category, num_categories, num_questions):
        questions_data = generate_questions(category, num_categories, num_questions)

        questions = []
        for subcategory, qs in questions_data.items():
            for q in qs:
                q["subcategory"] = subcategory
                questions.append(q)

        import random
        random.shuffle(questions)
        print(len(questions))

        index = 0
        question_text, _, _, visible = display_current_question(questions, index)
        return (
            gr.update(value=question_text, visible=visible),
            questions,
            index,
            [],
            gr.update(visible=visible),
            gr.update(visible=False, value=None),
            gr.update(visible=True, value=0.5),
            gr.update(visible=True),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=True)
        )

    def remove_button():
        return gr.update(visible=False)

    def display_loading():
        return gr.update(visible=True)

    def update_total_questions(num_categories, num_questions):
        total_questions = num_categories * num_questions
        return gr.update(value=total_questions)

    def display_results(index, questions):
        if index >= len(questions):
            return gr.update(visible=True)

    num_categories_input.change(update_total_questions, inputs=[num_categories_input, num_questions_input], outputs=[total_questions_display])
    num_questions_input.change(update_total_questions, inputs=[num_categories_input, num_questions_input], outputs=[total_questions_display])

    submit_category.click(remove_button, inputs=[], outputs=[submit_category])
    submit_category.click(display_loading, inputs=[], outputs=[loading_text])

    def make_uninteractive():
        return (
            gr.update(interactive=False),
            gr.update(interactive=False, visible=False),
            gr.update(interactive=False, visible=False)
        )

    submit_category.click(
        make_uninteractive,
        inputs=[],
        outputs=[category_input, num_categories_input, num_questions_input]
    )

    submit_category.click(
        on_generate_quiz,
        inputs=[category_input, num_categories_input, num_questions_input],
        outputs=[
            question_area,
            questions_state,
            index_state,
            user_answers_state,
            question_area,
            answer_area,
            confidence_slider,
            submit_answer_btn,
            result_area,
            submit_category,
            loading_text,
            guides
        ]
    )


    submit_answer_btn.click(
        submit_answer,
        inputs=[category_input, num_categories_input, num_questions_input, questions_state, index_state, answer_area, confidence_slider, user_answers_state],
        outputs=[question_area, index_state, submit_answer_btn, user_answers_state, result_area, confidence_slider, answer_area, analysis_area, guides]  # Add analysis_area here
    )

    submit_answer_btn.click(display_results, inputs=[index_state, questions_state], outputs=[result_area])
    
# Launch the app
app.launch(share=False, allowed_paths=["/"])