STUDENTS_QUIZBOT-AJ

Sleeping

App Files Files Community

Anupam251272 commited on Feb 11

Commit

82e8b6e

verified ·

1 Parent(s): 1cd87c0

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -181

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# Importing libraries
 import pandas as pd
 import json
 import gradio as gr
@@ -9,222 +8,116 @@ from tempfile import NamedTemporaryFile
 from sentence_transformers import CrossEncoder
 import numpy as np
 from time import perf_counter
-from sentence_transformers import CrossEncoder
-#calling functions from other files - to call the knowledge database tables (lancedb for accurate mode) for creating quiz
-from backend.semantic_search import table, retriever
 VECTOR_COLUMN_NAME = "vector"
 TEXT_COLUMN_NAME = "text"
 proj_dir = Path.cwd()
-# Set up logging
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Replace Mixtral client with Qwen Client
 client = Client("Qwen/Qwen1.5-110B-Chat-demo")
-def system_instructions(question_difficulty, topic, documents_str):
-    return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". Example: 'A10':'Q10:C3' [/INST]"""
-# Ragatouille database for Colbert ie highly accurate mode
 RAG_db = gr.State()
 quiz_data = None
-#defining a function to convert json file to excel file
 def json_to_excel(output_json):
-    # Initialize list for DataFrame
     data = []
-    gr.Warning('Generating Shareable file link..', duration=30)
-    for i in range(1, 11):  # Assuming there are 10 questions
-        question_key = f"Q{i}"
-        answer_key = f"A{i}"
         question = output_json.get(question_key, '')
         correct_answer_key = output_json.get(answer_key, '')
-        #correct_answer = correct_answer_key.split(':')[-1] if correct_answer_key else ''
         correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''
-        # Extract options
-        option_keys = [f"{question_key}:C{i}" for i in range(1, 6)]
-        options = [output_json.get(key, '') for key in option_keys]
-        # Add data row
-        data.append([
-            question,                     # Question Text
-            "Multiple Choice",            # Question Type
-            options[0],                   # Option 1
-            options[1],                   # Option 2
-            options[2] if len(options) > 2 else '',  # Option 3
-            options[3] if len(options) > 3 else '',  # Option 4
-            options[4] if len(options) > 4 else '',  # Option 5
-            correct_answer,               # Correct Answer
-            30,                           # Time in seconds
-            ''                            # Image Link
-        ])
-    # Create DataFrame
-    df = pd.DataFrame(data, columns=[
-        "Question Text",
-        "Question Type",
-        "Option 1",
-        "Option 2",
-        "Option 3",
-        "Option 4",
-        "Option 5",
-        "Correct Answer",
-        "Time in seconds",
-        "Image Link"
-    ])
     temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
     df.to_excel(temp_file.name, index=False)
     return temp_file.name
-# Define a colorful theme
-colorful_theme = gr.themes.Default(
-    primary_hue="cyan",      # Set a bright cyan as primary color
-    secondary_hue="yellow", # Set a bright magenta as secondary color
-    neutral_hue="purple"  # Optionally set a neutral color
-)
-#gradio app creation for a user interface
-with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT:
-    # Create a single row for the HTML and Image
     with gr.Row():
         with gr.Column(scale=2):
             gr.Image(value='logo.png', height=200, width=200)
         with gr.Column(scale=6):
             gr.HTML("""
             <center>
-                <h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL,SUTHUKENY</span> STUDENTS QUIZBOT</h1>
                 <h2>Generative AI-powered Capacity building for STUDENTS</h2>
-                <i>⚠️STUDENTS CAN CREATE QUIZ AND EVALUATE BY THEMSELVES ! ⚠️</i>
             </center>
             """)
-    topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from 10 social CBSE")
-    with gr.Row():
-        difficulty_radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?")
-        model_radio = gr.Radio(choices=[ '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'],
-                               value='(ACCURATE) BGE reranker', label="Embeddings",
-                               info="First query to ColBERT may take a little time")
-    generate_quiz_btn = gr.Button("Generate Quiz!🚀")
     quiz_msg = gr.Textbox()
-    question_radios = [gr.Radio(visible=False) for _ in range(10)]
-    @generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")])
-    def generate_quiz(question_difficulty, topic, cross_encoder):
-        top_k_rank = 10
-        documents = []
-        gr.Warning('Generating Quiz may take 1-2 minutes. Please wait.', duration=60)
-        if cross_encoder == '(HIGH ACCURATE) ColBERT':
-            gr.Warning('Retrieving using ColBERT.. First-time query will take 2 minute for model to load.. please wait',duration=100)
-            RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
-            RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
-            documents_full = RAG_db.value.search(topic, k=top_k_rank)
-            documents = [item['content'] for item in documents_full]
-        else:
-            document_start = perf_counter()
-            query_vec = retriever.encode(topic)
-            doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
-            documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
-            documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
-            query_doc_pair = [[topic, doc] for doc in documents]
-            # if cross_encoder == '(FAST) MiniLM-L6v2':
-            #     cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
-            if cross_encoder == '(ACCURATE) BGE reranker':
-                cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
-            cross_scores = cross_encoder1.predict(query_doc_pair)
-            sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
-            documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
-        #creating a text prompt to Qwen model combining the documents and system instruction
-        formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
-        print('                      Formatted Prompt : ' ,formatted_prompt)
-        try:
-            response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")
-            response1 = response[1][0][1]
-            # Extract JSON
-            start_index = response1.find('{')
-            end_index = response1.rfind('}')
-            cleaned_response = response1[start_index:end_index + 1] if start_index != -1 and end_index != -1 else ''
-            print('Cleaned Response :',cleaned_response)
-            output_json = json.loads(cleaned_response)
-            # Assign the extracted JSON to quiz_data for use in the comparison function
-            global quiz_data
-            quiz_data = output_json
-            # Generate the Excel file
-            excel_file = json_to_excel(output_json)
-            #Create a Quiz display in app
-            question_radio_list = []
-            for question_num in range(1, 11):
-                question_key = f"Q{question_num}"
-                answer_key = f"A{question_num}"
-                question = output_json.get(question_key)
-                answer = output_json.get(output_json.get(answer_key))
-                if not question or not answer:
-                    continue
-                choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
-                choice_list = [output_json.get(choice_key, "Choice not found") for choice_key in choice_keys]
-                radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
-                question_radio_list.append(radio)
-            return ['Quiz Generated!'] + question_radio_list + [excel_file]
-        except json.JSONDecodeError as e:
-            print(f"Failed to decode JSON: {e}")
     check_button = gr.Button("Check Score")
     score_textbox = gr.Markdown()
-    @check_button.click(inputs=question_radios, outputs=score_textbox)
-    def compare_answers(*user_answers):
-        user_answer_list = list(user_answers)
-        answers_list = []
-        for question_num in range(1, 20):
-            answer_key = f"A{question_num}"
-            answer = quiz_data.get(quiz_data.get(answer_key))
-            if not answer:
-                break
-            answers_list.append(answer)
-        score = sum(1 for item in user_answer_list if item in answers_list)
-        if score > 7:
-            message = f"### Excellent! You got {score} out of 10!"
-        elif score > 5:
-            message = f"### Good! You got {score} out of 10!"
-        else:
-            message = f"### You got {score} out of 10! Don't worry. You can prepare well and try better next time!"
-        return message
 QUIZBOT.queue()
 QUIZBOT.launch(debug=True)

 import pandas as pd
 import json
 import gradio as gr
 from sentence_transformers import CrossEncoder
 import numpy as np
 from time import perf_counter
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Constants
 VECTOR_COLUMN_NAME = "vector"
 TEXT_COLUMN_NAME = "text"
+QUIZ_QUESTIONS = 10
 proj_dir = Path.cwd()
 client = Client("Qwen/Qwen1.5-110B-Chat-demo")
+# Import external retrieval functions
+from backend.semantic_search import table, retriever
+# RAG Database for ColBERT retrieval
 RAG_db = gr.State()
 quiz_data = None
+def system_instructions(question_difficulty, topic, documents_str):
+    return f"""
+    <s> [INST] You are a great teacher and your task is to create {QUIZ_QUESTIONS} questions with 4 choices each,
+    with {question_difficulty} difficulty about the topic "{topic}" only from the given documents:
+    {documents_str}. Provide output in JSON format as follows:
+    "Q#":"", "Q#:C1":"", "Q#:C2":"", "Q#:C3":"", "Q#:C4":"", "A#":"Q#:C#"
+    Example: {{ "A10":"Q10:C3" }} [/INST]"""
 def json_to_excel(output_json):
     data = []
+    for i in range(1, QUIZ_QUESTIONS + 1):
+        question_key, answer_key = f"Q{i}", f"A{i}"
         question = output_json.get(question_key, '')
         correct_answer_key = output_json.get(answer_key, '')
         correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''
+        options = [output_json.get(f"{question_key}:C{j}", '') for j in range(1, 5)]
+        data.append([question, "Multiple Choice", *options, correct_answer, 30, ''])
+    df = pd.DataFrame(data, columns=["Question Text", "Question Type", "Option 1", "Option 2", "Option 3", "Option 4", "Correct Answer", "Time in seconds", "Image Link"])
     temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
     df.to_excel(temp_file.name, index=False)
     return temp_file.name
+def retrieve_documents(topic, cross_encoder):
+    top_k_rank = 10
+    documents = []
+    if cross_encoder == '(HIGH ACCURATE) ColBERT':
+        RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
+        RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
+        documents_full = RAG_db.value.search(topic, k=top_k_rank)
+        documents = [item['content'] for item in documents_full]
+    else:
+        query_vec = retriever.encode(topic)
+        doc_results = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
+        documents = [doc[TEXT_COLUMN_NAME] for doc in doc_results]
+        if cross_encoder == '(ACCURATE) BGE reranker':
+            model = CrossEncoder('BAAI/bge-reranker-base')
+            scores = model.predict([[topic, doc] for doc in documents])
+            documents = [documents[idx] for idx in np.argsort(scores)[::-1][:top_k_rank]]
+    return documents
+def generate_quiz(question_difficulty, topic, cross_encoder):
+    documents = retrieve_documents(topic, cross_encoder)
+    formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
+    try:
+        response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")[1][0][1]
+        output_json = json.loads(response[response.find('{'):response.rfind('}') + 1])
+        global quiz_data
+        quiz_data = output_json
+        return ['Quiz Generated!'] + [gr.Radio(choices=[output_json.get(f"Q{i}:C{j}", "") for j in range(1, 5)], label=output_json.get(f"Q{i}"), visible=True) for i in range(1, QUIZ_QUESTIONS + 1)] + [json_to_excel(output_json)]
+    except json.JSONDecodeError as e:
+        logger.error(f"Failed to decode JSON: {e}")
+        return ["Error generating quiz"]
+def compare_answers(*user_answers):
+    score = sum(1 for i, answer in enumerate(user_answers) if answer == quiz_data.get(quiz_data.get(f"A{i+1}"), ""))
+    return f"### {'Excellent!' if score > 7 else 'Good!' if score > 5 else 'Keep Trying!'} You got {score} out of {QUIZ_QUESTIONS}!"
+colorful_theme = gr.themes.Default(primary_hue="cyan", secondary_hue="yellow", neutral_hue="purple")
+with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT:
     with gr.Row():
         with gr.Column(scale=2):
             gr.Image(value='logo.png', height=200, width=200)
         with gr.Column(scale=6):
             gr.HTML("""
             <center>
+                <h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL, SUTHUKENY</span> STUDENTS QUIZBOT</h1>
                 <h2>Generative AI-powered Capacity building for STUDENTS</h2>
+                <i>⚠️ STUDENTS CAN CREATE QUIZ AND EVALUATE BY THEMSELVES! ⚠️</i>
             </center>
             """)
+    topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic from Class 10 CBSE")
+    difficulty_radio = gr.Radio(["easy", "average", "hard"], label="Select Quiz Difficulty")
+    model_radio = gr.Radio(["(ACCURATE) BGE reranker", "(HIGH ACCURATE) ColBERT"], value="(ACCURATE) BGE reranker", label="Embeddings Model")
+    generate_quiz_btn = gr.Button("Generate Quiz! 🚀")
     quiz_msg = gr.Textbox()
+    question_radios = [gr.Radio(visible=False) for _ in range(QUIZ_QUESTIONS)]
+    generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")], fn=generate_quiz)
     check_button = gr.Button("Check Score")
     score_textbox = gr.Markdown()
+    check_button.click(inputs=question_radios, outputs=score_textbox, fn=compare_answers)
 QUIZBOT.queue()
 QUIZBOT.launch(debug=True)