File size: 8,846 Bytes
7337af2
 
 
 
 
 
 
5b43d30
c1fb255
5b43d30
 
0b8902f
5b43d30
0b8902f
7450487
7337af2
c1fb255
 
 
 
7337af2
 
 
c188244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b8902f
c188244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b43d30
 
 
0b8902f
5b43d30
 
 
 
 
 
 
56dbb11
 
 
 
 
c1fb255
 
 
 
 
 
 
dd5270c
 
 
 
 
 
 
 
 
 
 
 
 
c1fb255
 
f0ed948
5f34cbc
f0ed948
5f34cbc
3cb5eb1
 
f0ed948
5b43d30
56dbb11
75bf678
 
56dbb11
75bf678
3cb5eb1
48c1789
ef7ef20
75bf678
3cb5eb1
 
c1fb255
5f34cbc
2799f0d
75bf678
3cb5eb1
56dbb11
 
 
 
 
 
 
 
 
75bf678
 
 
02e1cbf
7337af2
 
75bf678
81e1762
0b8902f
56dbb11
81e1762
0b8902f
56dbb11
 
0b8902f
 
75bf678
 
56dbb11
7337af2
 
56dbb11
75bf678
7337af2
75bf678
56dbb11
 
c3e8f96
9b5e1d8
7337af2
 
0b8902f
9b5e1d8
7337af2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import os
from groq import Groq
import gradio as gr 
import pytesseract
from sentence_transformers import SentenceTransformer, util
from PIL import Image
from typing import List
import torch
from transformers import BertTokenizer, BertModel, T5ForConditionalGeneration, T5Tokenizer
import torch.nn.functional as F

# Load pre-trained models
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')
sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Load the pre-trained T5 model and tokenizer for grammar error detection
grammar_model = T5ForConditionalGeneration.from_pretrained('t5-base')
grammar_tokenizer = T5Tokenizer.from_pretrained('t5-base')

# Initialize Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# System prompt for Groq
system_prompt = {
    "role": "system",
    "content": "You are a useful assistant. You reply with efficient answers."
}

async def chat_groq(message, history):
    messages = [system_prompt]
    for msg in history:
        messages.append({"role": "user", "content": str(msg[0])})
        messages.append({"role": "assistant", "content": str(msg[1])})
    messages.append({"role": "user", "content": str(message)})

    response_content = ''
    
    stream = client.chat.completions.create(
        model="llama3-70b-8192",
        messages=messages,
        max_tokens=1024,
        temperature=1.3,
        stream=True
    )

    for chunk in stream:
        content = chunk.choices[0].delta.content
        if content:
            response_content += chunk.choices[0].delta.content
        yield response_content

def extract_text_from_image(filepath: str, languages: List[str]):
    image = Image.open(filepath)
    lang_str = '+'.join(languages)
    return pytesseract.image_to_string(image=image, lang=lang_str)

def assign_badge(grade):
    if grade == 5:
        return "Gold Badge 🌟"
    elif grade == 4:
        return "Silver Badge πŸ₯ˆ"
    elif grade == 3:
        return "Bronze Badge πŸ₯‰"
    else:
        return "Keep Improving Badge πŸ’ͺ"

def detailed_feedback(similarity_score):
    if similarity_score >= 0.9:
        return {"Clarity": "Excellent", "Completeness": "Complete", "Accuracy": "Accurate"}
    elif similarity_score >= 0.8:
        return {"Clarity": "Good", "Completeness": "Almost Complete", "Accuracy": "Mostly Accurate"}
    elif similarity_score >= 0.7:
        return {"Clarity": "Fair", "Completeness": "Partial", "Accuracy": "Some Errors"}
    else:
        return {"Clarity": "Needs Improvement", "Completeness": "Incomplete", "Accuracy": "Inaccurate"}

def get_grade(similarity_score):
    if similarity_score >= 0.9:
        return 5
    elif similarity_score >= 0.8:
        return 4
    elif similarity_score >= 0.7:
        return 3
    elif similarity_score >= 0.6:
        return 2
    else:
        return 1

def get_bert_embedding(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = bert_model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings

def calculate_cosine_similarity(embedding1, embedding2):
    similarity = F.cosine_similarity(embedding1, embedding2)
    return similarity.item()

def calculate_sentence_similarity(text1, text2):
    embedding1 = sentence_model.encode(text1, convert_to_tensor=True)
    embedding2 = sentence_model.encode(text2, convert_to_tensor=True)
    return util.pytorch_cos_sim(embedding1, embedding2).item()

# Grammar detection and penalization using T5 model
def detect_grammar_errors(text):
    input_text = f"grammar: {text}"
    inputs = grammar_tokenizer.encode(input_text, return_tensors='pt', max_length=512, truncation=True)
    outputs = grammar_model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
    grammar_analysis = grammar_tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Compare the corrected sentence with the original one
    corrected_sentence = grammar_analysis
    
    # Simple heuristic to count changes as errors (e.g., difference in word count)
    # You can also use edit distance or other sophisticated techniques to count errors
    original_words = text.split()
    corrected_words = corrected_sentence.split()
    
    # Calculate a rough estimate of grammar errors based on differences in word count
    error_count = abs(len(original_words) - len(corrected_words))
    
    return error_count, corrected_sentence


def penalize_for_grammar(student_answer):
    grammar_errors, _ = detect_grammar_errors(student_answer)  # Only take the first element (error count)
    print(f"errors given by grammer:{grammar_errors}")
    # Apply a penalty based on the number of grammar errors (max 70% penalty)
    penalty = max(0, 1 - 0.25 * grammar_errors)
    return penalty


def compare_answers(student_answer, teacher_answer):
    bert_similarity = calculate_cosine_similarity(get_bert_embedding(student_answer), get_bert_embedding(teacher_answer))
    print(f"BERT similarity: {bert_similarity}")
    
    sentence_similarity = calculate_sentence_similarity(student_answer, teacher_answer)
    print(f"Sentence similarity: {sentence_similarity}")
    
    # Use a higher weight for BERT similarity
    semantic_similarity = (0.50 * bert_similarity + 0.50 * sentence_similarity)
    print(f"Semantic similarity: {semantic_similarity}")
    
    # Apply grammar penalty
    grammar_penalty = penalize_for_grammar(student_answer)
    final_similarity = semantic_similarity - grammar_penalty
    print(f"grammar penalty: {grammar_penalty}")
    print(f"Final similarity (after grammar penalty): {final_similarity}")
    
    return final_similarity

def extract_keywords(text):
    return set(text.lower().split())

def check_keywords(student_answer, model_answer):
    student_keywords = extract_keywords(student_answer)
    teacher_keywords = extract_keywords(model_answer)
    keyword_overlap = len(student_keywords.intersection(teacher_keywords))
    keyword_similarity = keyword_overlap / (len(teacher_keywords) if len(teacher_keywords) > 0 else 1)
    print(f"Keyword similarity: {keyword_similarity}")
    return keyword_similarity

def evaluate_answer(image, languages, model_answer):
    student_answer = extract_text_from_image(image, languages)
    print(f"Extracted student answer: {student_answer}")
    
    # Calculate semantic similarity
    semantic_similarity = compare_answers(student_answer, model_answer)
    
    # Calculate keyword similarity
    keyword_similarity = check_keywords(student_answer, model_answer)
    
    # Adjust the weight of keyword similarity
    combined_similarity = (0.9 * semantic_similarity + 0.1 * keyword_similarity)
    print(f"Combined similarity score: {combined_similarity}")
    
    grade = get_grade(combined_similarity)
    feedback = f"Student's answer: {student_answer}\nTeacher's answer: {model_answer}"
    badge = assign_badge(grade)
    detailed_feedback_msg = detailed_feedback(combined_similarity)
    
    prompt = f"The student got grade: {grade} when the student's answer is: {student_answer} and the teacher's answer is: {model_answer}. Justify the grade given to the student."
    
    return grade, combined_similarity * 100, feedback, badge, detailed_feedback_msg, prompt

async def gradio_interface(image, languages: List[str], model_answer="The process of photosynthesis helps plants produce glucose using sunlight.", prompt="", history=[]):
    grade, similarity_score, feedback, badge, detailed_feedback_msg, prompt = evaluate_answer(image, languages, model_answer)
    response = ""
    async for result in chat_groq(prompt, history):
        response = result
    return grade, similarity_score, feedback, badge, detailed_feedback_msg, response

language_choices = pytesseract.get_languages()

interface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Image(type="filepath", label="Input"), 
        gr.CheckboxGroup(language_choices, type="value", value=['eng'], label='Language'),
        gr.Textbox(lines=2, placeholder="Enter your model answer here", label="Model Answer"),
        gr.Textbox(lines=2, placeholder="Enter your prompt here", label="Prompt")
    ],
    outputs=[
        gr.Text(label="Grade"), 
        gr.Number(label="Similarity Score (%)"), 
        gr.Text(label="Feedback"), 
        gr.Text(label="Badge"), 
        gr.JSON(label="Detailed Feedback"), 
        gr.Text(label="Generated Response")
    ],
    title="Enhanced Automated Grading System",
    description="Upload an image of your answer sheet to get a grade from 1 to 5, similarity score, visual feedback, badge, and detailed feedback based on the model answer.",
    live=True
)

if __name__ == "__main__":
    interface.queue()
    interface.launch()