File size: 7,493 Bytes
7337af2 5b43d30 56dbb11 7337af2 c188244 5b43d30 56dbb11 5b43d30 56dbb11 02e1cbf 7337af2 56dbb11 7337af2 56dbb11 7337af2 56dbb11 7337af2 c3e8f96 9b5e1d8 7337af2 9b5e1d8 7337af2 9b5e1d8 7337af2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import os
from groq import Groq
import gradio as gr
import pytesseract
from sentence_transformers import SentenceTransformer, util
from PIL import Image
from typing import List
import torch
from transformers import BertTokenizer, BertModel
import torch.nn.functional as F
# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Initialize Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# System prompt for Groq
system_prompt = {
"role": "system",
"content": "You are a useful assistant. You reply with efficient answers."
}
# Function to interact with Groq for generating response
async def chat_groq(message, history):
messages = [system_prompt]
for msg in history:
messages.append({"role": "user", "content": str(msg[0])})
messages.append({"role": "assistant", "content": str(msg[1])})
messages.append({"role": "user", "content": str(message)})
response_content = ''
stream = client.chat.completions.create(
model="llama3-70b-8192",
messages=messages,
max_tokens=1024,
temperature=1.3,
stream=True
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
response_content += chunk.choices[0].delta.content
yield response_content
# Extract text from an image using Tesseract
def extract_text_from_image(filepath: str, languages: List[str]):
image = Image.open(filepath)
lang_str = '+'.join(languages) # Join languages for Tesseract
return pytesseract.image_to_string(image=image, lang=lang_str)
# Assign badges based on the grade
def assign_badge(grade):
if grade == 5:
return "Gold Badge π"
elif grade == 4:
return "Silver Badge π₯"
elif grade == 3:
return "Bronze Badge π₯"
else:
return "Keep Improving Badge πͺ"
# Categorize feedback into clarity, completeness, and accuracy
def detailed_feedback(similarity_score):
if similarity_score >= 0.9:
return {"Clarity": "Excellent", "Completeness": "Complete", "Accuracy": "Accurate"}
elif similarity_score >= 0.8:
return {"Clarity": "Good", "Completeness": "Almost Complete", "Accuracy": "Mostly Accurate"}
elif similarity_score >= 0.7:
return {"Clarity": "Fair", "Completeness": "Partial", "Accuracy": "Some Errors"}
else:
return {"Clarity": "Needs Improvement", "Completeness": "Incomplete", "Accuracy": "Inaccurate"}
# Assign grades based on similarity score
def get_grade(similarity_score):
if similarity_score >= 0.9:
return 5
elif similarity_score >= 0.8:
return 4
elif similarity_score >= 0.7:
return 3
elif similarity_score >= 0.6:
return 2
else:
return 1
# Function to get BERT embeddings
def get_bert_embedding(text):
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
embeddings = outputs.last_hidden_state.mean(dim=1)
return embeddings
# Function to calculate cosine similarity
def calculate_cosine_similarity(embedding1, embedding2):
similarity = F.cosine_similarity(embedding1, embedding2)
return similarity.item()
# Function to calculate sentence embedding similarity
def calculate_sentence_similarity(text1, text2):
embedding1 = sentence_model.encode(text1, convert_to_tensor=True)
embedding2 = sentence_model.encode(text2, convert_to_tensor=True)
return util.pytorch_cos_sim(embedding1, embedding2).item()
# Function to compare logic of student and teacher answers
def compare_answers(student_answer, teacher_answer):
bert_similarity = calculate_cosine_similarity(get_bert_embedding(student_answer), get_bert_embedding(teacher_answer))
sentence_similarity = calculate_sentence_similarity(student_answer, teacher_answer)
# Combine scores with weights to emphasize one method over another if needed
final_similarity = (0.5 * bert_similarity + 0.5 * sentence_similarity)
return final_similarity
# Function to extract keywords from the model answer (simple keyword extraction)
def extract_keywords(text):
# Split text into words and count them
return set(text.lower().split())
# Adjust grading based on key terms present in student answer
def check_keywords(student_answer, model_answer):
student_keywords = extract_keywords(student_answer)
teacher_keywords = extract_keywords(model_answer)
keyword_overlap = len(student_keywords.intersection(teacher_keywords))
return keyword_overlap / len(teacher_keywords) # Ratio of matching keywords
# Function to evaluate student's answer by comparing it to a model answer
def evaluate_answer(image, languages, model_answer):
student_answer = extract_text_from_image(image, languages)
semantic_similarity = compare_answers(student_answer, model_answer)
keyword_similarity = check_keywords(student_answer, model_answer)
# Combine semantic similarity with keyword presence check
combined_similarity = (semantic_similarity + keyword_similarity) / 2
grade = get_grade(combined_similarity)
feedback = f"Student's answer: {student_answer}\nTeacher's answer: {model_answer}"
badge = assign_badge(grade)
detailed_feedback_msg = detailed_feedback(combined_similarity)
prompt = f"The student got grade: {grade} when the student's answer is: {student_answer} and the teacher's answer is: {model_answer}. Justify the grade given to the student."
return grade, combined_similarity * 100, feedback, badge, detailed_feedback_msg, prompt
# The rest of your existing code...
# Main interface function for Gradio
async def gradio_interface(image, languages: List[str], model_answer="The process of photosynthesis helps plants produce glucose using sunlight.", prompt="", history=[]):
grade, similarity_score, feedback, badge, detailed_feedback_msg, prompt = evaluate_answer(image, languages, model_answer)
response = ""
async for result in chat_groq(prompt, history):
response = result # Get the Groq response
return grade, similarity_score, feedback, badge, detailed_feedback_msg, response
# Get available Tesseract languages
language_choices = pytesseract.get_languages()
# Define Gradio interface
interface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Image(type="filepath", label="Input"),
gr.CheckboxGroup(language_choices, type="value", value=['eng'], label='Language'),
gr.Textbox(lines=2, placeholder="Enter your model answer here", label="Model Answer"),
gr.Textbox(lines=2, placeholder="Enter your prompt here", label="Prompt")
],
outputs=[
gr.Text(label="Grade"),
gr.Number(label="Similarity Score (%)"),
gr.Text(label="Feedback"),
# gr.HTML(label="Visual Feedback"),
gr.Text(label="Badge"),
gr.JSON(label="Detailed Feedback"),
gr.Text(label="Generated Response")
],
title="Enhanced Automated Grading System",
description="Upload an image of your answer sheet to get a grade from 1 to 5, similarity score, visual feedback, badge, and detailed feedback based on the model answer.",
live=True
)
if __name__ == "__main__":
interface.queue()
interface.launch()
|