File size: 5,783 Bytes
be3f613
 
 
 
 
 
 
 
 
 
82e8b6e
be3f613
82e8b6e
 
 
be3f613
82e8b6e
be3f613
 
82e8b6e
be3f613
 
 
82e8b6e
 
be3f613
82e8b6e
be3f613
 
 
82e8b6e
 
 
 
 
 
 
be3f613
 
 
82e8b6e
 
be3f613
 
 
82e8b6e
 
 
 
be3f613
 
 
 
82e8b6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be3f613
82e8b6e
 
 
 
 
be3f613
82e8b6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be3f613
 
 
 
 
 
82e8b6e
be3f613
82e8b6e
be3f613
 
 
82e8b6e
 
 
 
 
be3f613
82e8b6e
 
 
 
be3f613
 
82e8b6e
be3f613
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import pandas as pd
import json
import gradio as gr
from pathlib import Path
from ragatouille import RAGPretrainedModel
from gradio_client import Client
from tempfile import NamedTemporaryFile
from sentence_transformers import CrossEncoder
import numpy as np
from time import perf_counter
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Constants
VECTOR_COLUMN_NAME = "vector"
TEXT_COLUMN_NAME = "text"
QUIZ_QUESTIONS = 10
proj_dir = Path.cwd()
client = Client("Qwen/Qwen1.5-110B-Chat-demo")

# Import external retrieval functions
from backend.semantic_search import table, retriever

# RAG Database for ColBERT retrieval
RAG_db = gr.State()
quiz_data = None

def system_instructions(question_difficulty, topic, documents_str):
    return f"""
    <s> [INST] You are a great teacher and your task is to create {QUIZ_QUESTIONS} questions with 4 choices each,
    with {question_difficulty} difficulty about the topic "{topic}" only from the given documents:
    {documents_str}. Provide output in JSON format as follows:
    "Q#":"", "Q#:C1":"", "Q#:C2":"", "Q#:C3":"", "Q#:C4":"", "A#":"Q#:C#"
    Example: {{ "A10":"Q10:C3" }} [/INST]"""

def json_to_excel(output_json):
    data = []
    for i in range(1, QUIZ_QUESTIONS + 1):
        question_key, answer_key = f"Q{i}", f"A{i}"
        question = output_json.get(question_key, '')
        correct_answer_key = output_json.get(answer_key, '')
        correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''
        options = [output_json.get(f"{question_key}:C{j}", '') for j in range(1, 5)]
        data.append([question, "Multiple Choice", *options, correct_answer, 30, ''])
    
    df = pd.DataFrame(data, columns=["Question Text", "Question Type", "Option 1", "Option 2", "Option 3", "Option 4", "Correct Answer", "Time in seconds", "Image Link"])
    temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
    df.to_excel(temp_file.name, index=False)
    return temp_file.name

def retrieve_documents(topic, cross_encoder):
    top_k_rank = 10
    documents = []
    
    if cross_encoder == '(HIGH ACCURATE) ColBERT':
        RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
        RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
        documents_full = RAG_db.value.search(topic, k=top_k_rank)
        documents = [item['content'] for item in documents_full]
    else:
        query_vec = retriever.encode(topic)
        doc_results = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
        documents = [doc[TEXT_COLUMN_NAME] for doc in doc_results]
        
        if cross_encoder == '(ACCURATE) BGE reranker':
            model = CrossEncoder('BAAI/bge-reranker-base')
            scores = model.predict([[topic, doc] for doc in documents])
            documents = [documents[idx] for idx in np.argsort(scores)[::-1][:top_k_rank]]
    
    return documents

def generate_quiz(question_difficulty, topic, cross_encoder):
    documents = retrieve_documents(topic, cross_encoder)
    formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
    
    try:
        response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")[1][0][1]
        output_json = json.loads(response[response.find('{'):response.rfind('}') + 1])
        global quiz_data
        quiz_data = output_json
        return ['Quiz Generated!'] + [gr.Radio(choices=[output_json.get(f"Q{i}:C{j}", "") for j in range(1, 5)], label=output_json.get(f"Q{i}"), visible=True) for i in range(1, QUIZ_QUESTIONS + 1)] + [json_to_excel(output_json)]
    except json.JSONDecodeError as e:
        logger.error(f"Failed to decode JSON: {e}")
        return ["Error generating quiz"]

def compare_answers(*user_answers):
    score = sum(1 for i, answer in enumerate(user_answers) if answer == quiz_data.get(quiz_data.get(f"A{i+1}"), ""))
    return f"### {'Excellent!' if score > 7 else 'Good!' if score > 5 else 'Keep Trying!'} You got {score} out of {QUIZ_QUESTIONS}!"

colorful_theme = gr.themes.Default(primary_hue="cyan", secondary_hue="yellow", neutral_hue="purple")

with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT:
    with gr.Row():
        with gr.Column(scale=2):
            gr.Image(value='logo.png', height=200, width=200)
        with gr.Column(scale=6):
            gr.HTML("""
            <center>
                <h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL, SUTHUKENY</span> STUDENTS QUIZBOT</h1>
                <h2>Generative AI-powered Capacity building for STUDENTS</h2>
                <i>⚠️ STUDENTS CAN CREATE QUIZ AND EVALUATE BY THEMSELVES! ⚠️</i>
            </center>
            """)
    
    topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic from Class 10 CBSE")
    difficulty_radio = gr.Radio(["easy", "average", "hard"], label="Select Quiz Difficulty")
    model_radio = gr.Radio(["(ACCURATE) BGE reranker", "(HIGH ACCURATE) ColBERT"], value="(ACCURATE) BGE reranker", label="Embeddings Model")
    
    generate_quiz_btn = gr.Button("Generate Quiz! 🚀")
    quiz_msg = gr.Textbox()
    question_radios = [gr.Radio(visible=False) for _ in range(QUIZ_QUESTIONS)]
    
    generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")], fn=generate_quiz)
    
    check_button = gr.Button("Check Score")
    score_textbox = gr.Markdown()
    check_button.click(inputs=question_radios, outputs=score_textbox, fn=compare_answers)

QUIZBOT.queue()
QUIZBOT.launch(debug=True)