Update app.py
Browse files
app.py
CHANGED
|
@@ -1,13 +1,10 @@
|
|
| 1 |
import os
|
| 2 |
from groq import Groq
|
| 3 |
import gradio as gr
|
| 4 |
-
from transformers import pipeline
|
| 5 |
import pytesseract
|
| 6 |
from sentence_transformers import SentenceTransformer, util
|
| 7 |
from PIL import Image
|
| 8 |
from typing import List
|
| 9 |
-
import requests
|
| 10 |
-
|
| 11 |
import torch
|
| 12 |
from transformers import BertTokenizer, BertModel
|
| 13 |
import torch.nn.functional as F
|
|
@@ -15,57 +12,16 @@ import torch.nn.functional as F
|
|
| 15 |
# Load pre-trained BERT model and tokenizer
|
| 16 |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 17 |
model = BertModel.from_pretrained('bert-base-uncased')
|
| 18 |
-
|
| 19 |
|
| 20 |
# Initialize Groq client
|
| 21 |
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
| 22 |
|
| 23 |
-
# System prompt for Groq
|
| 24 |
-
system_prompt = {
|
| 25 |
-
"role": "system",
|
| 26 |
-
"content": "You are a useful assistant. You reply with efficient answers."
|
| 27 |
-
}
|
| 28 |
-
|
| 29 |
-
# Function to interact with Groq for generating response
|
| 30 |
-
async def chat_groq(message, history):
|
| 31 |
-
messages = [system_prompt]
|
| 32 |
-
|
| 33 |
-
for msg in history:
|
| 34 |
-
messages.append({"role": "user", "content": str(msg[0])})
|
| 35 |
-
messages.append({"role": "assistant", "content": str(msg[1])})
|
| 36 |
-
|
| 37 |
-
messages.append({"role": "user", "content": str(message)})
|
| 38 |
-
|
| 39 |
-
response_content = ''
|
| 40 |
-
|
| 41 |
-
stream = client.chat.completions.create(
|
| 42 |
-
model="llama3-70b-8192",
|
| 43 |
-
messages=messages,
|
| 44 |
-
max_tokens=1024,
|
| 45 |
-
temperature=1.3,
|
| 46 |
-
stream=True
|
| 47 |
-
)
|
| 48 |
-
|
| 49 |
-
for chunk in stream:
|
| 50 |
-
content = chunk.choices[0].delta.content
|
| 51 |
-
if content:
|
| 52 |
-
response_content += chunk.choices[0].delta.content
|
| 53 |
-
yield response_content
|
| 54 |
-
|
| 55 |
-
# Extract text from an image using Tesseract
|
| 56 |
-
def extract_text_from_image(filepath: str, languages: List[str]):
|
| 57 |
-
image = Image.open(filepath)
|
| 58 |
-
lang_str = '+'.join(languages) # Join languages for Tesseract
|
| 59 |
-
return pytesseract.image_to_string(image=image, lang=lang_str)
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
# Function to get BERT embeddings
|
| 64 |
def get_bert_embedding(text):
|
| 65 |
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
|
| 66 |
with torch.no_grad():
|
| 67 |
outputs = model(**inputs)
|
| 68 |
-
# Get the embeddings from the last hidden state
|
| 69 |
embeddings = outputs.last_hidden_state.mean(dim=1)
|
| 70 |
return embeddings
|
| 71 |
|
|
@@ -74,65 +30,49 @@ def calculate_cosine_similarity(embedding1, embedding2):
|
|
| 74 |
similarity = F.cosine_similarity(embedding1, embedding2)
|
| 75 |
return similarity.item()
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
# Function to compare logic of student and teacher answers
|
| 78 |
def compare_answers(student_answer, teacher_answer):
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
#
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
return "Keep Improving Badge 💪"
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
# Categorize feedback into clarity, completeness, and accuracy
|
| 100 |
-
def detailed_feedback(similarity_score):
|
| 101 |
-
if similarity_score >= 0.9:
|
| 102 |
-
return {"Clarity": "Excellent", "Completeness": "Complete", "Accuracy": "Accurate"}
|
| 103 |
-
elif similarity_score >= 0.8:
|
| 104 |
-
return {"Clarity": "Good", "Completeness": "Almost Complete", "Accuracy": "Mostly Accurate"}
|
| 105 |
-
elif similarity_score >= 0.7:
|
| 106 |
-
return {"Clarity": "Fair", "Completeness": "Partial", "Accuracy": "Some Errors"}
|
| 107 |
-
else:
|
| 108 |
-
return {"Clarity": "Needs Improvement", "Completeness": "Incomplete", "Accuracy": "Inaccurate"}
|
| 109 |
-
|
| 110 |
-
# Assign grades based on similarity score
|
| 111 |
-
def get_grade(similarity_score):
|
| 112 |
-
if similarity_score >= 0.9:
|
| 113 |
-
return 5
|
| 114 |
-
elif similarity_score >= 0.8:
|
| 115 |
-
return 4
|
| 116 |
-
elif similarity_score >= 0.7:
|
| 117 |
-
return 3
|
| 118 |
-
elif similarity_score >= 0.6:
|
| 119 |
-
return 2
|
| 120 |
-
else:
|
| 121 |
-
return 1
|
| 122 |
-
|
| 123 |
-
|
| 124 |
|
| 125 |
# Function to evaluate student's answer by comparing it to a model answer
|
| 126 |
def evaluate_answer(image, languages, model_answer):
|
| 127 |
student_answer = extract_text_from_image(image, languages)
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
feedback = f"Student's answer: {student_answer}\nTeacher's answer: {model_answer}"
|
| 131 |
-
# visual_feedback = generate_sequence_feedback(student_answer, model_answer)
|
| 132 |
badge = assign_badge(grade)
|
| 133 |
-
detailed_feedback_msg = detailed_feedback(
|
| 134 |
prompt = f"The student got grade: {grade} when the student's answer is: {student_answer} and the teacher's answer is: {model_answer}. Justify the grade given to the student."
|
| 135 |
-
return grade,
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
# Main interface function for Gradio
|
| 138 |
async def gradio_interface(image, languages: List[str], model_answer="The process of photosynthesis helps plants produce glucose using sunlight.", prompt="", history=[]):
|
|
|
|
| 1 |
import os
|
| 2 |
from groq import Groq
|
| 3 |
import gradio as gr
|
|
|
|
| 4 |
import pytesseract
|
| 5 |
from sentence_transformers import SentenceTransformer, util
|
| 6 |
from PIL import Image
|
| 7 |
from typing import List
|
|
|
|
|
|
|
| 8 |
import torch
|
| 9 |
from transformers import BertTokenizer, BertModel
|
| 10 |
import torch.nn.functional as F
|
|
|
|
| 12 |
# Load pre-trained BERT model and tokenizer
|
| 13 |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 14 |
model = BertModel.from_pretrained('bert-base-uncased')
|
| 15 |
+
sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 16 |
|
| 17 |
# Initialize Groq client
|
| 18 |
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# Function to get BERT embeddings
|
| 21 |
def get_bert_embedding(text):
|
| 22 |
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
|
| 23 |
with torch.no_grad():
|
| 24 |
outputs = model(**inputs)
|
|
|
|
| 25 |
embeddings = outputs.last_hidden_state.mean(dim=1)
|
| 26 |
return embeddings
|
| 27 |
|
|
|
|
| 30 |
similarity = F.cosine_similarity(embedding1, embedding2)
|
| 31 |
return similarity.item()
|
| 32 |
|
| 33 |
+
# Function to calculate sentence embedding similarity
|
| 34 |
+
def calculate_sentence_similarity(text1, text2):
|
| 35 |
+
embedding1 = sentence_model.encode(text1, convert_to_tensor=True)
|
| 36 |
+
embedding2 = sentence_model.encode(text2, convert_to_tensor=True)
|
| 37 |
+
return util.pytorch_cos_sim(embedding1, embedding2).item()
|
| 38 |
+
|
| 39 |
# Function to compare logic of student and teacher answers
|
| 40 |
def compare_answers(student_answer, teacher_answer):
|
| 41 |
+
bert_similarity = calculate_cosine_similarity(get_bert_embedding(student_answer), get_bert_embedding(teacher_answer))
|
| 42 |
+
sentence_similarity = calculate_sentence_similarity(student_answer, teacher_answer)
|
| 43 |
+
# Combine scores with weights to emphasize one method over another if needed
|
| 44 |
+
final_similarity = (0.5 * bert_similarity + 0.5 * sentence_similarity)
|
| 45 |
+
return final_similarity
|
| 46 |
+
|
| 47 |
+
# Function to extract keywords from the model answer (simple keyword extraction)
|
| 48 |
+
def extract_keywords(text):
|
| 49 |
+
# Split text into words and count them
|
| 50 |
+
return set(text.lower().split())
|
| 51 |
+
|
| 52 |
+
# Adjust grading based on key terms present in student answer
|
| 53 |
+
def check_keywords(student_answer, model_answer):
|
| 54 |
+
student_keywords = extract_keywords(student_answer)
|
| 55 |
+
teacher_keywords = extract_keywords(model_answer)
|
| 56 |
+
keyword_overlap = len(student_keywords.intersection(teacher_keywords))
|
| 57 |
+
return keyword_overlap / len(teacher_keywords) # Ratio of matching keywords
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
# Function to evaluate student's answer by comparing it to a model answer
|
| 60 |
def evaluate_answer(image, languages, model_answer):
|
| 61 |
student_answer = extract_text_from_image(image, languages)
|
| 62 |
+
semantic_similarity = compare_answers(student_answer, model_answer)
|
| 63 |
+
keyword_similarity = check_keywords(student_answer, model_answer)
|
| 64 |
+
|
| 65 |
+
# Combine semantic similarity with keyword presence check
|
| 66 |
+
combined_similarity = (semantic_similarity + keyword_similarity) / 2
|
| 67 |
+
grade = get_grade(combined_similarity)
|
| 68 |
feedback = f"Student's answer: {student_answer}\nTeacher's answer: {model_answer}"
|
|
|
|
| 69 |
badge = assign_badge(grade)
|
| 70 |
+
detailed_feedback_msg = detailed_feedback(combined_similarity)
|
| 71 |
prompt = f"The student got grade: {grade} when the student's answer is: {student_answer} and the teacher's answer is: {model_answer}. Justify the grade given to the student."
|
| 72 |
+
return grade, combined_similarity * 100, feedback, badge, detailed_feedback_msg, prompt
|
| 73 |
+
|
| 74 |
+
# The rest of your existing code...
|
| 75 |
+
|
| 76 |
|
| 77 |
# Main interface function for Gradio
|
| 78 |
async def gradio_interface(image, languages: List[str], model_answer="The process of photosynthesis helps plants produce glucose using sunlight.", prompt="", history=[]):
|