File size: 5,783 Bytes
be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 82e8b6e be3f613 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import pandas as pd
import json
import gradio as gr
from pathlib import Path
from ragatouille import RAGPretrainedModel
from gradio_client import Client
from tempfile import NamedTemporaryFile
from sentence_transformers import CrossEncoder
import numpy as np
from time import perf_counter
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Constants
VECTOR_COLUMN_NAME = "vector"
TEXT_COLUMN_NAME = "text"
QUIZ_QUESTIONS = 10
proj_dir = Path.cwd()
client = Client("Qwen/Qwen1.5-110B-Chat-demo")
# Import external retrieval functions
from backend.semantic_search import table, retriever
# RAG Database for ColBERT retrieval
RAG_db = gr.State()
quiz_data = None
def system_instructions(question_difficulty, topic, documents_str):
return f"""
<s> [INST] You are a great teacher and your task is to create {QUIZ_QUESTIONS} questions with 4 choices each,
with {question_difficulty} difficulty about the topic "{topic}" only from the given documents:
{documents_str}. Provide output in JSON format as follows:
"Q#":"", "Q#:C1":"", "Q#:C2":"", "Q#:C3":"", "Q#:C4":"", "A#":"Q#:C#"
Example: {{ "A10":"Q10:C3" }} [/INST]"""
def json_to_excel(output_json):
data = []
for i in range(1, QUIZ_QUESTIONS + 1):
question_key, answer_key = f"Q{i}", f"A{i}"
question = output_json.get(question_key, '')
correct_answer_key = output_json.get(answer_key, '')
correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''
options = [output_json.get(f"{question_key}:C{j}", '') for j in range(1, 5)]
data.append([question, "Multiple Choice", *options, correct_answer, 30, ''])
df = pd.DataFrame(data, columns=["Question Text", "Question Type", "Option 1", "Option 2", "Option 3", "Option 4", "Correct Answer", "Time in seconds", "Image Link"])
temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
df.to_excel(temp_file.name, index=False)
return temp_file.name
def retrieve_documents(topic, cross_encoder):
top_k_rank = 10
documents = []
if cross_encoder == '(HIGH ACCURATE) ColBERT':
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
documents_full = RAG_db.value.search(topic, k=top_k_rank)
documents = [item['content'] for item in documents_full]
else:
query_vec = retriever.encode(topic)
doc_results = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
documents = [doc[TEXT_COLUMN_NAME] for doc in doc_results]
if cross_encoder == '(ACCURATE) BGE reranker':
model = CrossEncoder('BAAI/bge-reranker-base')
scores = model.predict([[topic, doc] for doc in documents])
documents = [documents[idx] for idx in np.argsort(scores)[::-1][:top_k_rank]]
return documents
def generate_quiz(question_difficulty, topic, cross_encoder):
documents = retrieve_documents(topic, cross_encoder)
formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
try:
response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")[1][0][1]
output_json = json.loads(response[response.find('{'):response.rfind('}') + 1])
global quiz_data
quiz_data = output_json
return ['Quiz Generated!'] + [gr.Radio(choices=[output_json.get(f"Q{i}:C{j}", "") for j in range(1, 5)], label=output_json.get(f"Q{i}"), visible=True) for i in range(1, QUIZ_QUESTIONS + 1)] + [json_to_excel(output_json)]
except json.JSONDecodeError as e:
logger.error(f"Failed to decode JSON: {e}")
return ["Error generating quiz"]
def compare_answers(*user_answers):
score = sum(1 for i, answer in enumerate(user_answers) if answer == quiz_data.get(quiz_data.get(f"A{i+1}"), ""))
return f"### {'Excellent!' if score > 7 else 'Good!' if score > 5 else 'Keep Trying!'} You got {score} out of {QUIZ_QUESTIONS}!"
colorful_theme = gr.themes.Default(primary_hue="cyan", secondary_hue="yellow", neutral_hue="purple")
with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT:
with gr.Row():
with gr.Column(scale=2):
gr.Image(value='logo.png', height=200, width=200)
with gr.Column(scale=6):
gr.HTML("""
<center>
<h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL, SUTHUKENY</span> STUDENTS QUIZBOT</h1>
<h2>Generative AI-powered Capacity building for STUDENTS</h2>
<i>⚠️ STUDENTS CAN CREATE QUIZ AND EVALUATE BY THEMSELVES! ⚠️</i>
</center>
""")
topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic from Class 10 CBSE")
difficulty_radio = gr.Radio(["easy", "average", "hard"], label="Select Quiz Difficulty")
model_radio = gr.Radio(["(ACCURATE) BGE reranker", "(HIGH ACCURATE) ColBERT"], value="(ACCURATE) BGE reranker", label="Embeddings Model")
generate_quiz_btn = gr.Button("Generate Quiz! 🚀")
quiz_msg = gr.Textbox()
question_radios = [gr.Radio(visible=False) for _ in range(QUIZ_QUESTIONS)]
generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")], fn=generate_quiz)
check_button = gr.Button("Check Score")
score_textbox = gr.Markdown()
check_button.click(inputs=question_radios, outputs=score_textbox, fn=compare_answers)
QUIZBOT.queue()
QUIZBOT.launch(debug=True)
|