Anupam251272 commited on
Commit
82e8b6e
·
verified ·
1 Parent(s): 1cd87c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -181
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # Importing libraries
2
  import pandas as pd
3
  import json
4
  import gradio as gr
@@ -9,222 +8,116 @@ from tempfile import NamedTemporaryFile
9
  from sentence_transformers import CrossEncoder
10
  import numpy as np
11
  from time import perf_counter
12
- from sentence_transformers import CrossEncoder
13
 
14
- #calling functions from other files - to call the knowledge database tables (lancedb for accurate mode) for creating quiz
15
- from backend.semantic_search import table, retriever
 
16
 
 
17
  VECTOR_COLUMN_NAME = "vector"
18
  TEXT_COLUMN_NAME = "text"
 
19
  proj_dir = Path.cwd()
20
-
21
- # Set up logging
22
- import logging
23
- logging.basicConfig(level=logging.INFO)
24
- logger = logging.getLogger(__name__)
25
-
26
- # Replace Mixtral client with Qwen Client
27
  client = Client("Qwen/Qwen1.5-110B-Chat-demo")
28
 
29
- def system_instructions(question_difficulty, topic, documents_str):
30
- return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". Example: 'A10':'Q10:C3' [/INST]"""
31
 
32
- # Ragatouille database for Colbert ie highly accurate mode
33
  RAG_db = gr.State()
34
  quiz_data = None
35
 
 
 
 
 
 
 
 
36
 
37
- #defining a function to convert json file to excel file
38
  def json_to_excel(output_json):
39
- # Initialize list for DataFrame
40
  data = []
41
- gr.Warning('Generating Shareable file link..', duration=30)
42
- for i in range(1, 11): # Assuming there are 10 questions
43
- question_key = f"Q{i}"
44
- answer_key = f"A{i}"
45
-
46
  question = output_json.get(question_key, '')
47
  correct_answer_key = output_json.get(answer_key, '')
48
- #correct_answer = correct_answer_key.split(':')[-1] if correct_answer_key else ''
49
  correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''
50
-
51
- # Extract options
52
- option_keys = [f"{question_key}:C{i}" for i in range(1, 6)]
53
- options = [output_json.get(key, '') for key in option_keys]
54
-
55
- # Add data row
56
- data.append([
57
- question, # Question Text
58
- "Multiple Choice", # Question Type
59
- options[0], # Option 1
60
- options[1], # Option 2
61
- options[2] if len(options) > 2 else '', # Option 3
62
- options[3] if len(options) > 3 else '', # Option 4
63
- options[4] if len(options) > 4 else '', # Option 5
64
- correct_answer, # Correct Answer
65
- 30, # Time in seconds
66
- '' # Image Link
67
- ])
68
-
69
- # Create DataFrame
70
- df = pd.DataFrame(data, columns=[
71
- "Question Text",
72
- "Question Type",
73
- "Option 1",
74
- "Option 2",
75
- "Option 3",
76
- "Option 4",
77
- "Option 5",
78
- "Correct Answer",
79
- "Time in seconds",
80
- "Image Link"
81
- ])
82
-
83
  temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
84
  df.to_excel(temp_file.name, index=False)
85
  return temp_file.name
86
- # Define a colorful theme
87
- colorful_theme = gr.themes.Default(
88
- primary_hue="cyan", # Set a bright cyan as primary color
89
- secondary_hue="yellow", # Set a bright magenta as secondary color
90
- neutral_hue="purple" # Optionally set a neutral color
91
-
92
- )
93
 
94
- #gradio app creation for a user interface
95
- with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
 
 
 
 
 
97
 
98
- # Create a single row for the HTML and Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  with gr.Row():
100
  with gr.Column(scale=2):
101
  gr.Image(value='logo.png', height=200, width=200)
102
  with gr.Column(scale=6):
103
  gr.HTML("""
104
  <center>
105
- <h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL,SUTHUKENY</span> STUDENTS QUIZBOT</h1>
106
  <h2>Generative AI-powered Capacity building for STUDENTS</h2>
107
- <i>⚠️STUDENTS CAN CREATE QUIZ AND EVALUATE BY THEMSELVES ! ⚠️</i>
108
  </center>
109
  """)
110
-
111
-
112
-
113
 
114
- topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from 10 social CBSE")
115
-
116
- with gr.Row():
117
- difficulty_radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?")
118
- model_radio = gr.Radio(choices=[ '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'],
119
- value='(ACCURATE) BGE reranker', label="Embeddings",
120
- info="First query to ColBERT may take a little time")
121
-
122
- generate_quiz_btn = gr.Button("Generate Quiz!🚀")
123
  quiz_msg = gr.Textbox()
124
-
125
- question_radios = [gr.Radio(visible=False) for _ in range(10)]
126
-
127
- @generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")])
128
- def generate_quiz(question_difficulty, topic, cross_encoder):
129
- top_k_rank = 10
130
- documents = []
131
- gr.Warning('Generating Quiz may take 1-2 minutes. Please wait.', duration=60)
132
-
133
- if cross_encoder == '(HIGH ACCURATE) ColBERT':
134
- gr.Warning('Retrieving using ColBERT.. First-time query will take 2 minute for model to load.. please wait',duration=100)
135
- RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
136
- RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
137
- documents_full = RAG_db.value.search(topic, k=top_k_rank)
138
- documents = [item['content'] for item in documents_full]
139
-
140
- else:
141
- document_start = perf_counter()
142
- query_vec = retriever.encode(topic)
143
- doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
144
-
145
- documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
146
- documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
147
-
148
- query_doc_pair = [[topic, doc] for doc in documents]
149
-
150
- # if cross_encoder == '(FAST) MiniLM-L6v2':
151
- # cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
152
- if cross_encoder == '(ACCURATE) BGE reranker':
153
- cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
154
-
155
- cross_scores = cross_encoder1.predict(query_doc_pair)
156
- sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
157
- documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
158
-
159
- #creating a text prompt to Qwen model combining the documents and system instruction
160
- formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
161
- print(' Formatted Prompt : ' ,formatted_prompt)
162
- try:
163
- response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")
164
- response1 = response[1][0][1]
165
-
166
- # Extract JSON
167
- start_index = response1.find('{')
168
- end_index = response1.rfind('}')
169
- cleaned_response = response1[start_index:end_index + 1] if start_index != -1 and end_index != -1 else ''
170
- print('Cleaned Response :',cleaned_response)
171
- output_json = json.loads(cleaned_response)
172
- # Assign the extracted JSON to quiz_data for use in the comparison function
173
- global quiz_data
174
- quiz_data = output_json
175
- # Generate the Excel file
176
- excel_file = json_to_excel(output_json)
177
-
178
-
179
- #Create a Quiz display in app
180
- question_radio_list = []
181
- for question_num in range(1, 11):
182
- question_key = f"Q{question_num}"
183
- answer_key = f"A{question_num}"
184
-
185
- question = output_json.get(question_key)
186
- answer = output_json.get(output_json.get(answer_key))
187
-
188
- if not question or not answer:
189
- continue
190
-
191
- choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
192
- choice_list = [output_json.get(choice_key, "Choice not found") for choice_key in choice_keys]
193
-
194
- radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
195
- question_radio_list.append(radio)
196
-
197
- return ['Quiz Generated!'] + question_radio_list + [excel_file]
198
-
199
- except json.JSONDecodeError as e:
200
- print(f"Failed to decode JSON: {e}")
201
-
202
  check_button = gr.Button("Check Score")
203
  score_textbox = gr.Markdown()
204
-
205
- @check_button.click(inputs=question_radios, outputs=score_textbox)
206
- def compare_answers(*user_answers):
207
- user_answer_list = list(user_answers)
208
- answers_list = []
209
-
210
- for question_num in range(1, 20):
211
- answer_key = f"A{question_num}"
212
- answer = quiz_data.get(quiz_data.get(answer_key))
213
- if not answer:
214
- break
215
- answers_list.append(answer)
216
-
217
- score = sum(1 for item in user_answer_list if item in answers_list)
218
-
219
- if score > 7:
220
- message = f"### Excellent! You got {score} out of 10!"
221
- elif score > 5:
222
- message = f"### Good! You got {score} out of 10!"
223
- else:
224
- message = f"### You got {score} out of 10! Don't worry. You can prepare well and try better next time!"
225
-
226
- return message
227
 
228
  QUIZBOT.queue()
229
  QUIZBOT.launch(debug=True)
230
-
 
 
1
  import pandas as pd
2
  import json
3
  import gradio as gr
 
8
  from sentence_transformers import CrossEncoder
9
  import numpy as np
10
  from time import perf_counter
11
+ import logging
12
 
13
+ # Setup logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
 
17
+ # Constants
18
  VECTOR_COLUMN_NAME = "vector"
19
  TEXT_COLUMN_NAME = "text"
20
+ QUIZ_QUESTIONS = 10
21
  proj_dir = Path.cwd()
 
 
 
 
 
 
 
22
  client = Client("Qwen/Qwen1.5-110B-Chat-demo")
23
 
24
+ # Import external retrieval functions
25
+ from backend.semantic_search import table, retriever
26
 
27
+ # RAG Database for ColBERT retrieval
28
  RAG_db = gr.State()
29
  quiz_data = None
30
 
31
+ def system_instructions(question_difficulty, topic, documents_str):
32
+ return f"""
33
+ <s> [INST] You are a great teacher and your task is to create {QUIZ_QUESTIONS} questions with 4 choices each,
34
+ with {question_difficulty} difficulty about the topic "{topic}" only from the given documents:
35
+ {documents_str}. Provide output in JSON format as follows:
36
+ "Q#":"", "Q#:C1":"", "Q#:C2":"", "Q#:C3":"", "Q#:C4":"", "A#":"Q#:C#"
37
+ Example: {{ "A10":"Q10:C3" }} [/INST]"""
38
 
 
39
  def json_to_excel(output_json):
 
40
  data = []
41
+ for i in range(1, QUIZ_QUESTIONS + 1):
42
+ question_key, answer_key = f"Q{i}", f"A{i}"
 
 
 
43
  question = output_json.get(question_key, '')
44
  correct_answer_key = output_json.get(answer_key, '')
 
45
  correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''
46
+ options = [output_json.get(f"{question_key}:C{j}", '') for j in range(1, 5)]
47
+ data.append([question, "Multiple Choice", *options, correct_answer, 30, ''])
48
+
49
+ df = pd.DataFrame(data, columns=["Question Text", "Question Type", "Option 1", "Option 2", "Option 3", "Option 4", "Correct Answer", "Time in seconds", "Image Link"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
51
  df.to_excel(temp_file.name, index=False)
52
  return temp_file.name
 
 
 
 
 
 
 
53
 
54
+ def retrieve_documents(topic, cross_encoder):
55
+ top_k_rank = 10
56
+ documents = []
57
+
58
+ if cross_encoder == '(HIGH ACCURATE) ColBERT':
59
+ RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
60
+ RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
61
+ documents_full = RAG_db.value.search(topic, k=top_k_rank)
62
+ documents = [item['content'] for item in documents_full]
63
+ else:
64
+ query_vec = retriever.encode(topic)
65
+ doc_results = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
66
+ documents = [doc[TEXT_COLUMN_NAME] for doc in doc_results]
67
+
68
+ if cross_encoder == '(ACCURATE) BGE reranker':
69
+ model = CrossEncoder('BAAI/bge-reranker-base')
70
+ scores = model.predict([[topic, doc] for doc in documents])
71
+ documents = [documents[idx] for idx in np.argsort(scores)[::-1][:top_k_rank]]
72
 
73
+ return documents
74
+
75
+ def generate_quiz(question_difficulty, topic, cross_encoder):
76
+ documents = retrieve_documents(topic, cross_encoder)
77
+ formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
78
 
79
+ try:
80
+ response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")[1][0][1]
81
+ output_json = json.loads(response[response.find('{'):response.rfind('}') + 1])
82
+ global quiz_data
83
+ quiz_data = output_json
84
+ return ['Quiz Generated!'] + [gr.Radio(choices=[output_json.get(f"Q{i}:C{j}", "") for j in range(1, 5)], label=output_json.get(f"Q{i}"), visible=True) for i in range(1, QUIZ_QUESTIONS + 1)] + [json_to_excel(output_json)]
85
+ except json.JSONDecodeError as e:
86
+ logger.error(f"Failed to decode JSON: {e}")
87
+ return ["Error generating quiz"]
88
+
89
+ def compare_answers(*user_answers):
90
+ score = sum(1 for i, answer in enumerate(user_answers) if answer == quiz_data.get(quiz_data.get(f"A{i+1}"), ""))
91
+ return f"### {'Excellent!' if score > 7 else 'Good!' if score > 5 else 'Keep Trying!'} You got {score} out of {QUIZ_QUESTIONS}!"
92
+
93
+ colorful_theme = gr.themes.Default(primary_hue="cyan", secondary_hue="yellow", neutral_hue="purple")
94
+
95
+ with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT:
96
  with gr.Row():
97
  with gr.Column(scale=2):
98
  gr.Image(value='logo.png', height=200, width=200)
99
  with gr.Column(scale=6):
100
  gr.HTML("""
101
  <center>
102
+ <h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL, SUTHUKENY</span> STUDENTS QUIZBOT</h1>
103
  <h2>Generative AI-powered Capacity building for STUDENTS</h2>
104
+ <i>⚠️ STUDENTS CAN CREATE QUIZ AND EVALUATE BY THEMSELVES! ⚠️</i>
105
  </center>
106
  """)
 
 
 
107
 
108
+ topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic from Class 10 CBSE")
109
+ difficulty_radio = gr.Radio(["easy", "average", "hard"], label="Select Quiz Difficulty")
110
+ model_radio = gr.Radio(["(ACCURATE) BGE reranker", "(HIGH ACCURATE) ColBERT"], value="(ACCURATE) BGE reranker", label="Embeddings Model")
111
+
112
+ generate_quiz_btn = gr.Button("Generate Quiz! 🚀")
 
 
 
 
113
  quiz_msg = gr.Textbox()
114
+ question_radios = [gr.Radio(visible=False) for _ in range(QUIZ_QUESTIONS)]
115
+
116
+ generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")], fn=generate_quiz)
117
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  check_button = gr.Button("Check Score")
119
  score_textbox = gr.Markdown()
120
+ check_button.click(inputs=question_radios, outputs=score_textbox, fn=compare_answers)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  QUIZBOT.queue()
123
  QUIZBOT.launch(debug=True)