Spaces:
Sleeping
Sleeping
import pandas as pd | |
import json | |
import gradio as gr | |
from pathlib import Path | |
from ragatouille import RAGPretrainedModel | |
from gradio_client import Client | |
from tempfile import NamedTemporaryFile | |
from sentence_transformers import CrossEncoder | |
import numpy as np | |
from time import perf_counter | |
import logging | |
# Setup logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Constants | |
VECTOR_COLUMN_NAME = "vector" | |
TEXT_COLUMN_NAME = "text" | |
QUIZ_QUESTIONS = 10 | |
proj_dir = Path.cwd() | |
client = Client("Qwen/Qwen1.5-110B-Chat-demo") | |
# Import external retrieval functions | |
from backend.semantic_search import table, retriever | |
# RAG Database for ColBERT retrieval | |
RAG_db = gr.State() | |
quiz_data = None | |
def system_instructions(question_difficulty, topic, documents_str): | |
return f""" | |
<s> [INST] You are a great teacher and your task is to create {QUIZ_QUESTIONS} questions with 4 choices each, | |
with {question_difficulty} difficulty about the topic "{topic}" only from the given documents: | |
{documents_str}. Provide output in JSON format as follows: | |
"Q#":"", "Q#:C1":"", "Q#:C2":"", "Q#:C3":"", "Q#:C4":"", "A#":"Q#:C#" | |
Example: {{ "A10":"Q10:C3" }} [/INST]""" | |
def json_to_excel(output_json): | |
data = [] | |
for i in range(1, QUIZ_QUESTIONS + 1): | |
question_key, answer_key = f"Q{i}", f"A{i}" | |
question = output_json.get(question_key, '') | |
correct_answer_key = output_json.get(answer_key, '') | |
correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else '' | |
options = [output_json.get(f"{question_key}:C{j}", '') for j in range(1, 5)] | |
data.append([question, "Multiple Choice", *options, correct_answer, 30, '']) | |
df = pd.DataFrame(data, columns=["Question Text", "Question Type", "Option 1", "Option 2", "Option 3", "Option 4", "Correct Answer", "Time in seconds", "Image Link"]) | |
temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx") | |
df.to_excel(temp_file.name, index=False) | |
return temp_file.name | |
def retrieve_documents(topic, cross_encoder): | |
top_k_rank = 10 | |
documents = [] | |
if cross_encoder == '(HIGH ACCURATE) ColBERT': | |
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") | |
RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index') | |
documents_full = RAG_db.value.search(topic, k=top_k_rank) | |
documents = [item['content'] for item in documents_full] | |
else: | |
query_vec = retriever.encode(topic) | |
doc_results = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list() | |
documents = [doc[TEXT_COLUMN_NAME] for doc in doc_results] | |
if cross_encoder == '(ACCURATE) BGE reranker': | |
model = CrossEncoder('BAAI/bge-reranker-base') | |
scores = model.predict([[topic, doc] for doc in documents]) | |
documents = [documents[idx] for idx in np.argsort(scores)[::-1][:top_k_rank]] | |
return documents | |
def generate_quiz(question_difficulty, topic, cross_encoder): | |
documents = retrieve_documents(topic, cross_encoder) | |
formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents)) | |
try: | |
response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")[1][0][1] | |
output_json = json.loads(response[response.find('{'):response.rfind('}') + 1]) | |
global quiz_data | |
quiz_data = output_json | |
return ['Quiz Generated!'] + [gr.Radio(choices=[output_json.get(f"Q{i}:C{j}", "") for j in range(1, 5)], label=output_json.get(f"Q{i}"), visible=True) for i in range(1, QUIZ_QUESTIONS + 1)] + [json_to_excel(output_json)] | |
except json.JSONDecodeError as e: | |
logger.error(f"Failed to decode JSON: {e}") | |
return ["Error generating quiz"] | |
def compare_answers(*user_answers): | |
score = sum(1 for i, answer in enumerate(user_answers) if answer == quiz_data.get(quiz_data.get(f"A{i+1}"), "")) | |
return f"### {'Excellent!' if score > 7 else 'Good!' if score > 5 else 'Keep Trying!'} You got {score} out of {QUIZ_QUESTIONS}!" | |
colorful_theme = gr.themes.Default(primary_hue="cyan", secondary_hue="yellow", neutral_hue="purple") | |
with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT: | |
with gr.Row(): | |
with gr.Column(scale=2): | |
gr.Image(value='logo.png', height=200, width=200) | |
with gr.Column(scale=6): | |
gr.HTML(""" | |
<center> | |
<h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL, SUTHUKENY</span> STUDENTS QUIZBOT</h1> | |
<h2>Generative AI-powered Capacity building for STUDENTS</h2> | |
<i>⚠️ STUDENTS CAN CREATE QUIZ AND EVALUATE BY THEMSELVES! ⚠️</i> | |
</center> | |
""") | |
topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic from Class 10 CBSE") | |
difficulty_radio = gr.Radio(["easy", "average", "hard"], label="Select Quiz Difficulty") | |
model_radio = gr.Radio(["(ACCURATE) BGE reranker", "(HIGH ACCURATE) ColBERT"], value="(ACCURATE) BGE reranker", label="Embeddings Model") | |
generate_quiz_btn = gr.Button("Generate Quiz! 🚀") | |
quiz_msg = gr.Textbox() | |
question_radios = [gr.Radio(visible=False) for _ in range(QUIZ_QUESTIONS)] | |
generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")], fn=generate_quiz) | |
check_button = gr.Button("Check Score") | |
score_textbox = gr.Markdown() | |
check_button.click(inputs=question_radios, outputs=score_textbox, fn=compare_answers) | |
QUIZBOT.queue() | |
QUIZBOT.launch(debug=True) | |