File size: 7,863 Bytes
7337af2 5b43d30 c1fb255 5b43d30 0b8902f 5b43d30 0b8902f 56dbb11 7337af2 c1fb255 7337af2 c188244 0b8902f c188244 5b43d30 0b8902f 5b43d30 56dbb11 c1fb255 3cb5eb1 c1fb255 3cb5eb1 5b43d30 56dbb11 3cb5eb1 48c1789 3cb5eb1 c1fb255 3cb5eb1 56dbb11 0b8902f 02e1cbf 7337af2 81e1762 0b8902f 56dbb11 81e1762 0b8902f 56dbb11 0b8902f 56dbb11 7337af2 56dbb11 7337af2 56dbb11 c3e8f96 9b5e1d8 7337af2 0b8902f 9b5e1d8 7337af2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
import os
from groq import Groq
import gradio as gr
import pytesseract
from sentence_transformers import SentenceTransformer, util
from PIL import Image
from typing import List
import torch
from transformers import BertTokenizer, BertModel, T5ForConditionalGeneration, T5Tokenizer
import torch.nn.functional as F
# Load pre-trained models
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')
sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Load the pre-trained T5 model and tokenizer for grammar error detection
grammar_model = T5ForConditionalGeneration.from_pretrained('t5-base')
grammar_tokenizer = T5Tokenizer.from_pretrained('t5-base')
# Initialize Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# System prompt for Groq
system_prompt = {
"role": "system",
"content": "You are a useful assistant. You reply with efficient answers."
}
async def chat_groq(message, history):
messages = [system_prompt]
for msg in history:
messages.append({"role": "user", "content": str(msg[0])})
messages.append({"role": "assistant", "content": str(msg[1])})
messages.append({"role": "user", "content": str(message)})
response_content = ''
stream = client.chat.completions.create(
model="llama3-70b-8192",
messages=messages,
max_tokens=1024,
temperature=1.3,
stream=True
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
response_content += chunk.choices[0].delta.content
yield response_content
def extract_text_from_image(filepath: str, languages: List[str]):
image = Image.open(filepath)
lang_str = '+'.join(languages)
return pytesseract.image_to_string(image=image, lang=lang_str)
def assign_badge(grade):
if grade == 5:
return "Gold Badge π"
elif grade == 4:
return "Silver Badge π₯"
elif grade == 3:
return "Bronze Badge π₯"
else:
return "Keep Improving Badge πͺ"
def detailed_feedback(similarity_score):
if similarity_score >= 0.9:
return {"Clarity": "Excellent", "Completeness": "Complete", "Accuracy": "Accurate"}
elif similarity_score >= 0.8:
return {"Clarity": "Good", "Completeness": "Almost Complete", "Accuracy": "Mostly Accurate"}
elif similarity_score >= 0.7:
return {"Clarity": "Fair", "Completeness": "Partial", "Accuracy": "Some Errors"}
else:
return {"Clarity": "Needs Improvement", "Completeness": "Incomplete", "Accuracy": "Inaccurate"}
def get_grade(similarity_score):
if similarity_score >= 0.9:
return 5
elif similarity_score >= 0.8:
return 4
elif similarity_score >= 0.7:
return 3
elif similarity_score >= 0.6:
return 2
else:
return 1
def get_bert_embedding(text):
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
with torch.no_grad():
outputs = bert_model(**inputs)
embeddings = outputs.last_hidden_state.mean(dim=1)
return embeddings
def calculate_cosine_similarity(embedding1, embedding2):
similarity = F.cosine_similarity(embedding1, embedding2)
return similarity.item()
def calculate_sentence_similarity(text1, text2):
embedding1 = sentence_model.encode(text1, convert_to_tensor=True)
embedding2 = sentence_model.encode(text2, convert_to_tensor=True)
return util.pytorch_cos_sim(embedding1, embedding2).item()
# Grammar detection and penalization using T5 model
def detect_grammar_errors(text):
input_text = f"grammar: {text}"
inputs = grammar_tokenizer.encode(input_text, return_tensors='pt', max_length=512, truncation=True)
outputs = grammar_model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
grammar_analysis = grammar_tokenizer.decode(outputs[0], skip_special_tokens=True)
# Count the number of errors based on specific indicators (customize based on analysis)
error_count = grammar_analysis.count('error') # Use your own criteria
return error_count
def penalize_for_grammar(student_answer):
grammar_errors = detect_grammar_errors(student_answer)
# Apply a penalty based on the number of grammar errors (max 50% penalty)
penalty = max(0.5, 1 - 0.05 * grammar_errors)
return penalty
def compare_answers(student_answer, teacher_answer):
bert_similarity = calculate_cosine_similarity(get_bert_embedding(student_answer), get_bert_embedding(teacher_answer))
sentence_similarity = calculate_sentence_similarity(student_answer, teacher_answer)
# Use a higher weight for BERT similarity
semantic_similarity = (0.75 * bert_similarity + 0.25 * sentence_similarity)
# Apply grammar penalty
grammar_penalty = penalize_for_grammar(student_answer)
final_similarity = semantic_similarity * grammar_penalty
return final_similarity
def extract_keywords(text):
return set(text.lower().split())
def check_keywords(student_answer, model_answer):
student_keywords = extract_keywords(student_answer)
teacher_keywords = extract_keywords(model_answer)
keyword_overlap = len(student_keywords.intersection(teacher_keywords))
return keyword_overlap / (len(teacher_keywords) if len(teacher_keywords) > 0 else 1)
def evaluate_answer(image, languages, model_answer):
student_answer = extract_text_from_image(image, languages)
# Calculate semantic similarity
semantic_similarity = compare_answers(student_answer, model_answer)
# Calculate keyword similarity
keyword_similarity = check_keywords(student_answer, model_answer)
# Adjust the weight of keyword similarity
combined_similarity = (0.9 * semantic_similarity + 0.1 * keyword_similarity)
grade = get_grade(combined_similarity)
feedback = f"Student's answer: {student_answer}\nTeacher's answer: {model_answer}"
badge = assign_badge(grade)
detailed_feedback_msg = detailed_feedback(combined_similarity)
prompt = f"The student got grade: {grade} when the student's answer is: {student_answer} and the teacher's answer is: {model_answer}. Justify the grade given to the student."
return grade, combined_similarity * 100, feedback, badge, detailed_feedback_msg, prompt
async def gradio_interface(image, languages: List[str], model_answer="The process of photosynthesis helps plants produce glucose using sunlight.", prompt="", history=[]):
grade, similarity_score, feedback, badge, detailed_feedback_msg, prompt = evaluate_answer(image, languages, model_answer)
response = ""
async for result in chat_groq(prompt, history):
response = result
return grade, similarity_score, feedback, badge, detailed_feedback_msg, response
language_choices = pytesseract.get_languages()
interface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Image(type="filepath", label="Input"),
gr.CheckboxGroup(language_choices, type="value", value=['eng'], label='Language'),
gr.Textbox(lines=2, placeholder="Enter your model answer here", label="Model Answer"),
gr.Textbox(lines=2, placeholder="Enter your prompt here", label="Prompt")
],
outputs=[
gr.Text(label="Grade"),
gr.Number(label="Similarity Score (%)"),
gr.Text(label="Feedback"),
gr.Text(label="Badge"),
gr.JSON(label="Detailed Feedback"),
gr.Text(label="Generated Response")
],
title="Enhanced Automated Grading System",
description="Upload an image of your answer sheet to get a grade from 1 to 5, similarity score, visual feedback, badge, and detailed feedback based on the model answer.",
live=True
)
if __name__ == "__main__":
interface.queue()
interface.launch()
|