Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# Importing libraries
|
2 |
import pandas as pd
|
3 |
import json
|
4 |
import gradio as gr
|
@@ -9,222 +8,116 @@ from tempfile import NamedTemporaryFile
|
|
9 |
from sentence_transformers import CrossEncoder
|
10 |
import numpy as np
|
11 |
from time import perf_counter
|
12 |
-
|
13 |
|
14 |
-
#
|
15 |
-
|
|
|
16 |
|
|
|
17 |
VECTOR_COLUMN_NAME = "vector"
|
18 |
TEXT_COLUMN_NAME = "text"
|
|
|
19 |
proj_dir = Path.cwd()
|
20 |
-
|
21 |
-
# Set up logging
|
22 |
-
import logging
|
23 |
-
logging.basicConfig(level=logging.INFO)
|
24 |
-
logger = logging.getLogger(__name__)
|
25 |
-
|
26 |
-
# Replace Mixtral client with Qwen Client
|
27 |
client = Client("Qwen/Qwen1.5-110B-Chat-demo")
|
28 |
|
29 |
-
|
30 |
-
|
31 |
|
32 |
-
#
|
33 |
RAG_db = gr.State()
|
34 |
quiz_data = None
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
#defining a function to convert json file to excel file
|
38 |
def json_to_excel(output_json):
|
39 |
-
# Initialize list for DataFrame
|
40 |
data = []
|
41 |
-
|
42 |
-
|
43 |
-
question_key = f"Q{i}"
|
44 |
-
answer_key = f"A{i}"
|
45 |
-
|
46 |
question = output_json.get(question_key, '')
|
47 |
correct_answer_key = output_json.get(answer_key, '')
|
48 |
-
#correct_answer = correct_answer_key.split(':')[-1] if correct_answer_key else ''
|
49 |
correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
# Add data row
|
56 |
-
data.append([
|
57 |
-
question, # Question Text
|
58 |
-
"Multiple Choice", # Question Type
|
59 |
-
options[0], # Option 1
|
60 |
-
options[1], # Option 2
|
61 |
-
options[2] if len(options) > 2 else '', # Option 3
|
62 |
-
options[3] if len(options) > 3 else '', # Option 4
|
63 |
-
options[4] if len(options) > 4 else '', # Option 5
|
64 |
-
correct_answer, # Correct Answer
|
65 |
-
30, # Time in seconds
|
66 |
-
'' # Image Link
|
67 |
-
])
|
68 |
-
|
69 |
-
# Create DataFrame
|
70 |
-
df = pd.DataFrame(data, columns=[
|
71 |
-
"Question Text",
|
72 |
-
"Question Type",
|
73 |
-
"Option 1",
|
74 |
-
"Option 2",
|
75 |
-
"Option 3",
|
76 |
-
"Option 4",
|
77 |
-
"Option 5",
|
78 |
-
"Correct Answer",
|
79 |
-
"Time in seconds",
|
80 |
-
"Image Link"
|
81 |
-
])
|
82 |
-
|
83 |
temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
|
84 |
df.to_excel(temp_file.name, index=False)
|
85 |
return temp_file.name
|
86 |
-
# Define a colorful theme
|
87 |
-
colorful_theme = gr.themes.Default(
|
88 |
-
primary_hue="cyan", # Set a bright cyan as primary color
|
89 |
-
secondary_hue="yellow", # Set a bright magenta as secondary color
|
90 |
-
neutral_hue="purple" # Optionally set a neutral color
|
91 |
-
|
92 |
-
)
|
93 |
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
with gr.Row():
|
100 |
with gr.Column(scale=2):
|
101 |
gr.Image(value='logo.png', height=200, width=200)
|
102 |
with gr.Column(scale=6):
|
103 |
gr.HTML("""
|
104 |
<center>
|
105 |
-
<h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL,SUTHUKENY</span> STUDENTS QUIZBOT</h1>
|
106 |
<h2>Generative AI-powered Capacity building for STUDENTS</h2>
|
107 |
-
<i>⚠️STUDENTS CAN CREATE QUIZ AND EVALUATE BY THEMSELVES
|
108 |
</center>
|
109 |
""")
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
|
114 |
-
topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
value='(ACCURATE) BGE reranker', label="Embeddings",
|
120 |
-
info="First query to ColBERT may take a little time")
|
121 |
-
|
122 |
-
generate_quiz_btn = gr.Button("Generate Quiz!🚀")
|
123 |
quiz_msg = gr.Textbox()
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
def generate_quiz(question_difficulty, topic, cross_encoder):
|
129 |
-
top_k_rank = 10
|
130 |
-
documents = []
|
131 |
-
gr.Warning('Generating Quiz may take 1-2 minutes. Please wait.', duration=60)
|
132 |
-
|
133 |
-
if cross_encoder == '(HIGH ACCURATE) ColBERT':
|
134 |
-
gr.Warning('Retrieving using ColBERT.. First-time query will take 2 minute for model to load.. please wait',duration=100)
|
135 |
-
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
|
136 |
-
RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
|
137 |
-
documents_full = RAG_db.value.search(topic, k=top_k_rank)
|
138 |
-
documents = [item['content'] for item in documents_full]
|
139 |
-
|
140 |
-
else:
|
141 |
-
document_start = perf_counter()
|
142 |
-
query_vec = retriever.encode(topic)
|
143 |
-
doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
|
144 |
-
|
145 |
-
documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
|
146 |
-
documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
|
147 |
-
|
148 |
-
query_doc_pair = [[topic, doc] for doc in documents]
|
149 |
-
|
150 |
-
# if cross_encoder == '(FAST) MiniLM-L6v2':
|
151 |
-
# cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
|
152 |
-
if cross_encoder == '(ACCURATE) BGE reranker':
|
153 |
-
cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
|
154 |
-
|
155 |
-
cross_scores = cross_encoder1.predict(query_doc_pair)
|
156 |
-
sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
|
157 |
-
documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
|
158 |
-
|
159 |
-
#creating a text prompt to Qwen model combining the documents and system instruction
|
160 |
-
formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
|
161 |
-
print(' Formatted Prompt : ' ,formatted_prompt)
|
162 |
-
try:
|
163 |
-
response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")
|
164 |
-
response1 = response[1][0][1]
|
165 |
-
|
166 |
-
# Extract JSON
|
167 |
-
start_index = response1.find('{')
|
168 |
-
end_index = response1.rfind('}')
|
169 |
-
cleaned_response = response1[start_index:end_index + 1] if start_index != -1 and end_index != -1 else ''
|
170 |
-
print('Cleaned Response :',cleaned_response)
|
171 |
-
output_json = json.loads(cleaned_response)
|
172 |
-
# Assign the extracted JSON to quiz_data for use in the comparison function
|
173 |
-
global quiz_data
|
174 |
-
quiz_data = output_json
|
175 |
-
# Generate the Excel file
|
176 |
-
excel_file = json_to_excel(output_json)
|
177 |
-
|
178 |
-
|
179 |
-
#Create a Quiz display in app
|
180 |
-
question_radio_list = []
|
181 |
-
for question_num in range(1, 11):
|
182 |
-
question_key = f"Q{question_num}"
|
183 |
-
answer_key = f"A{question_num}"
|
184 |
-
|
185 |
-
question = output_json.get(question_key)
|
186 |
-
answer = output_json.get(output_json.get(answer_key))
|
187 |
-
|
188 |
-
if not question or not answer:
|
189 |
-
continue
|
190 |
-
|
191 |
-
choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
|
192 |
-
choice_list = [output_json.get(choice_key, "Choice not found") for choice_key in choice_keys]
|
193 |
-
|
194 |
-
radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
|
195 |
-
question_radio_list.append(radio)
|
196 |
-
|
197 |
-
return ['Quiz Generated!'] + question_radio_list + [excel_file]
|
198 |
-
|
199 |
-
except json.JSONDecodeError as e:
|
200 |
-
print(f"Failed to decode JSON: {e}")
|
201 |
-
|
202 |
check_button = gr.Button("Check Score")
|
203 |
score_textbox = gr.Markdown()
|
204 |
-
|
205 |
-
@check_button.click(inputs=question_radios, outputs=score_textbox)
|
206 |
-
def compare_answers(*user_answers):
|
207 |
-
user_answer_list = list(user_answers)
|
208 |
-
answers_list = []
|
209 |
-
|
210 |
-
for question_num in range(1, 20):
|
211 |
-
answer_key = f"A{question_num}"
|
212 |
-
answer = quiz_data.get(quiz_data.get(answer_key))
|
213 |
-
if not answer:
|
214 |
-
break
|
215 |
-
answers_list.append(answer)
|
216 |
-
|
217 |
-
score = sum(1 for item in user_answer_list if item in answers_list)
|
218 |
-
|
219 |
-
if score > 7:
|
220 |
-
message = f"### Excellent! You got {score} out of 10!"
|
221 |
-
elif score > 5:
|
222 |
-
message = f"### Good! You got {score} out of 10!"
|
223 |
-
else:
|
224 |
-
message = f"### You got {score} out of 10! Don't worry. You can prepare well and try better next time!"
|
225 |
-
|
226 |
-
return message
|
227 |
|
228 |
QUIZBOT.queue()
|
229 |
QUIZBOT.launch(debug=True)
|
230 |
-
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import json
|
3 |
import gradio as gr
|
|
|
8 |
from sentence_transformers import CrossEncoder
|
9 |
import numpy as np
|
10 |
from time import perf_counter
|
11 |
+
import logging
|
12 |
|
13 |
+
# Setup logging
|
14 |
+
logging.basicConfig(level=logging.INFO)
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
|
17 |
+
# Constants
|
18 |
VECTOR_COLUMN_NAME = "vector"
|
19 |
TEXT_COLUMN_NAME = "text"
|
20 |
+
QUIZ_QUESTIONS = 10
|
21 |
proj_dir = Path.cwd()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
client = Client("Qwen/Qwen1.5-110B-Chat-demo")
|
23 |
|
24 |
+
# Import external retrieval functions
|
25 |
+
from backend.semantic_search import table, retriever
|
26 |
|
27 |
+
# RAG Database for ColBERT retrieval
|
28 |
RAG_db = gr.State()
|
29 |
quiz_data = None
|
30 |
|
31 |
+
def system_instructions(question_difficulty, topic, documents_str):
|
32 |
+
return f"""
|
33 |
+
<s> [INST] You are a great teacher and your task is to create {QUIZ_QUESTIONS} questions with 4 choices each,
|
34 |
+
with {question_difficulty} difficulty about the topic "{topic}" only from the given documents:
|
35 |
+
{documents_str}. Provide output in JSON format as follows:
|
36 |
+
"Q#":"", "Q#:C1":"", "Q#:C2":"", "Q#:C3":"", "Q#:C4":"", "A#":"Q#:C#"
|
37 |
+
Example: {{ "A10":"Q10:C3" }} [/INST]"""
|
38 |
|
|
|
39 |
def json_to_excel(output_json):
|
|
|
40 |
data = []
|
41 |
+
for i in range(1, QUIZ_QUESTIONS + 1):
|
42 |
+
question_key, answer_key = f"Q{i}", f"A{i}"
|
|
|
|
|
|
|
43 |
question = output_json.get(question_key, '')
|
44 |
correct_answer_key = output_json.get(answer_key, '')
|
|
|
45 |
correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''
|
46 |
+
options = [output_json.get(f"{question_key}:C{j}", '') for j in range(1, 5)]
|
47 |
+
data.append([question, "Multiple Choice", *options, correct_answer, 30, ''])
|
48 |
+
|
49 |
+
df = pd.DataFrame(data, columns=["Question Text", "Question Type", "Option 1", "Option 2", "Option 3", "Option 4", "Correct Answer", "Time in seconds", "Image Link"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
|
51 |
df.to_excel(temp_file.name, index=False)
|
52 |
return temp_file.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
+
def retrieve_documents(topic, cross_encoder):
|
55 |
+
top_k_rank = 10
|
56 |
+
documents = []
|
57 |
+
|
58 |
+
if cross_encoder == '(HIGH ACCURATE) ColBERT':
|
59 |
+
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
|
60 |
+
RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
|
61 |
+
documents_full = RAG_db.value.search(topic, k=top_k_rank)
|
62 |
+
documents = [item['content'] for item in documents_full]
|
63 |
+
else:
|
64 |
+
query_vec = retriever.encode(topic)
|
65 |
+
doc_results = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
|
66 |
+
documents = [doc[TEXT_COLUMN_NAME] for doc in doc_results]
|
67 |
+
|
68 |
+
if cross_encoder == '(ACCURATE) BGE reranker':
|
69 |
+
model = CrossEncoder('BAAI/bge-reranker-base')
|
70 |
+
scores = model.predict([[topic, doc] for doc in documents])
|
71 |
+
documents = [documents[idx] for idx in np.argsort(scores)[::-1][:top_k_rank]]
|
72 |
|
73 |
+
return documents
|
74 |
+
|
75 |
+
def generate_quiz(question_difficulty, topic, cross_encoder):
|
76 |
+
documents = retrieve_documents(topic, cross_encoder)
|
77 |
+
formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
|
78 |
|
79 |
+
try:
|
80 |
+
response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")[1][0][1]
|
81 |
+
output_json = json.loads(response[response.find('{'):response.rfind('}') + 1])
|
82 |
+
global quiz_data
|
83 |
+
quiz_data = output_json
|
84 |
+
return ['Quiz Generated!'] + [gr.Radio(choices=[output_json.get(f"Q{i}:C{j}", "") for j in range(1, 5)], label=output_json.get(f"Q{i}"), visible=True) for i in range(1, QUIZ_QUESTIONS + 1)] + [json_to_excel(output_json)]
|
85 |
+
except json.JSONDecodeError as e:
|
86 |
+
logger.error(f"Failed to decode JSON: {e}")
|
87 |
+
return ["Error generating quiz"]
|
88 |
+
|
89 |
+
def compare_answers(*user_answers):
|
90 |
+
score = sum(1 for i, answer in enumerate(user_answers) if answer == quiz_data.get(quiz_data.get(f"A{i+1}"), ""))
|
91 |
+
return f"### {'Excellent!' if score > 7 else 'Good!' if score > 5 else 'Keep Trying!'} You got {score} out of {QUIZ_QUESTIONS}!"
|
92 |
+
|
93 |
+
colorful_theme = gr.themes.Default(primary_hue="cyan", secondary_hue="yellow", neutral_hue="purple")
|
94 |
+
|
95 |
+
with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT:
|
96 |
with gr.Row():
|
97 |
with gr.Column(scale=2):
|
98 |
gr.Image(value='logo.png', height=200, width=200)
|
99 |
with gr.Column(scale=6):
|
100 |
gr.HTML("""
|
101 |
<center>
|
102 |
+
<h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL, SUTHUKENY</span> STUDENTS QUIZBOT</h1>
|
103 |
<h2>Generative AI-powered Capacity building for STUDENTS</h2>
|
104 |
+
<i>⚠️ STUDENTS CAN CREATE QUIZ AND EVALUATE BY THEMSELVES! ⚠️</i>
|
105 |
</center>
|
106 |
""")
|
|
|
|
|
|
|
107 |
|
108 |
+
topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic from Class 10 CBSE")
|
109 |
+
difficulty_radio = gr.Radio(["easy", "average", "hard"], label="Select Quiz Difficulty")
|
110 |
+
model_radio = gr.Radio(["(ACCURATE) BGE reranker", "(HIGH ACCURATE) ColBERT"], value="(ACCURATE) BGE reranker", label="Embeddings Model")
|
111 |
+
|
112 |
+
generate_quiz_btn = gr.Button("Generate Quiz! 🚀")
|
|
|
|
|
|
|
|
|
113 |
quiz_msg = gr.Textbox()
|
114 |
+
question_radios = [gr.Radio(visible=False) for _ in range(QUIZ_QUESTIONS)]
|
115 |
+
|
116 |
+
generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")], fn=generate_quiz)
|
117 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
check_button = gr.Button("Check Score")
|
119 |
score_textbox = gr.Markdown()
|
120 |
+
check_button.click(inputs=question_radios, outputs=score_textbox, fn=compare_answers)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
QUIZBOT.queue()
|
123 |
QUIZBOT.launch(debug=True)
|
|