Garvitj commited on
Commit
56dbb11
·
verified ·
1 Parent(s): 5b43d30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -95
app.py CHANGED
@@ -1,13 +1,10 @@
1
  import os
2
  from groq import Groq
3
  import gradio as gr
4
- from transformers import pipeline
5
  import pytesseract
6
  from sentence_transformers import SentenceTransformer, util
7
  from PIL import Image
8
  from typing import List
9
- import requests
10
-
11
  import torch
12
  from transformers import BertTokenizer, BertModel
13
  import torch.nn.functional as F
@@ -15,57 +12,16 @@ import torch.nn.functional as F
15
  # Load pre-trained BERT model and tokenizer
16
  tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
17
  model = BertModel.from_pretrained('bert-base-uncased')
18
-
19
 
20
  # Initialize Groq client
21
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
22
 
23
- # System prompt for Groq
24
- system_prompt = {
25
- "role": "system",
26
- "content": "You are a useful assistant. You reply with efficient answers."
27
- }
28
-
29
- # Function to interact with Groq for generating response
30
- async def chat_groq(message, history):
31
- messages = [system_prompt]
32
-
33
- for msg in history:
34
- messages.append({"role": "user", "content": str(msg[0])})
35
- messages.append({"role": "assistant", "content": str(msg[1])})
36
-
37
- messages.append({"role": "user", "content": str(message)})
38
-
39
- response_content = ''
40
-
41
- stream = client.chat.completions.create(
42
- model="llama3-70b-8192",
43
- messages=messages,
44
- max_tokens=1024,
45
- temperature=1.3,
46
- stream=True
47
- )
48
-
49
- for chunk in stream:
50
- content = chunk.choices[0].delta.content
51
- if content:
52
- response_content += chunk.choices[0].delta.content
53
- yield response_content
54
-
55
- # Extract text from an image using Tesseract
56
- def extract_text_from_image(filepath: str, languages: List[str]):
57
- image = Image.open(filepath)
58
- lang_str = '+'.join(languages) # Join languages for Tesseract
59
- return pytesseract.image_to_string(image=image, lang=lang_str)
60
-
61
-
62
-
63
  # Function to get BERT embeddings
64
  def get_bert_embedding(text):
65
  inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
66
  with torch.no_grad():
67
  outputs = model(**inputs)
68
- # Get the embeddings from the last hidden state
69
  embeddings = outputs.last_hidden_state.mean(dim=1)
70
  return embeddings
71
 
@@ -74,65 +30,49 @@ def calculate_cosine_similarity(embedding1, embedding2):
74
  similarity = F.cosine_similarity(embedding1, embedding2)
75
  return similarity.item()
76
 
 
 
 
 
 
 
77
  # Function to compare logic of student and teacher answers
78
  def compare_answers(student_answer, teacher_answer):
79
- student_embedding = get_bert_embedding(student_answer)
80
- teacher_embedding = get_bert_embedding(teacher_answer)
81
- similarity_score = calculate_cosine_similarity(student_embedding, teacher_embedding)
82
- return similarity_score
83
-
84
-
85
-
86
-
87
- # Assign badges based on the grade
88
- def assign_badge(grade):
89
- if grade == 5:
90
- return "Gold Badge 🌟"
91
- elif grade == 4:
92
- return "Silver Badge 🥈"
93
- elif grade == 3:
94
- return "Bronze Badge 🥉"
95
- else:
96
- return "Keep Improving Badge 💪"
97
-
98
-
99
- # Categorize feedback into clarity, completeness, and accuracy
100
- def detailed_feedback(similarity_score):
101
- if similarity_score >= 0.9:
102
- return {"Clarity": "Excellent", "Completeness": "Complete", "Accuracy": "Accurate"}
103
- elif similarity_score >= 0.8:
104
- return {"Clarity": "Good", "Completeness": "Almost Complete", "Accuracy": "Mostly Accurate"}
105
- elif similarity_score >= 0.7:
106
- return {"Clarity": "Fair", "Completeness": "Partial", "Accuracy": "Some Errors"}
107
- else:
108
- return {"Clarity": "Needs Improvement", "Completeness": "Incomplete", "Accuracy": "Inaccurate"}
109
-
110
- # Assign grades based on similarity score
111
- def get_grade(similarity_score):
112
- if similarity_score >= 0.9:
113
- return 5
114
- elif similarity_score >= 0.8:
115
- return 4
116
- elif similarity_score >= 0.7:
117
- return 3
118
- elif similarity_score >= 0.6:
119
- return 2
120
- else:
121
- return 1
122
-
123
-
124
 
125
  # Function to evaluate student's answer by comparing it to a model answer
126
  def evaluate_answer(image, languages, model_answer):
127
  student_answer = extract_text_from_image(image, languages)
128
- similarity_score = compare_answers(student_answer, model_answer)
129
- grade = get_grade(similarity_score)
 
 
 
 
130
  feedback = f"Student's answer: {student_answer}\nTeacher's answer: {model_answer}"
131
- # visual_feedback = generate_sequence_feedback(student_answer, model_answer)
132
  badge = assign_badge(grade)
133
- detailed_feedback_msg = detailed_feedback(similarity_score)
134
  prompt = f"The student got grade: {grade} when the student's answer is: {student_answer} and the teacher's answer is: {model_answer}. Justify the grade given to the student."
135
- return grade, similarity_score * 100, feedback, badge, detailed_feedback_msg, prompt
 
 
 
136
 
137
  # Main interface function for Gradio
138
  async def gradio_interface(image, languages: List[str], model_answer="The process of photosynthesis helps plants produce glucose using sunlight.", prompt="", history=[]):
 
1
  import os
2
  from groq import Groq
3
  import gradio as gr
 
4
  import pytesseract
5
  from sentence_transformers import SentenceTransformer, util
6
  from PIL import Image
7
  from typing import List
 
 
8
  import torch
9
  from transformers import BertTokenizer, BertModel
10
  import torch.nn.functional as F
 
12
  # Load pre-trained BERT model and tokenizer
13
  tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
14
  model = BertModel.from_pretrained('bert-base-uncased')
15
+ sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
16
 
17
  # Initialize Groq client
18
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # Function to get BERT embeddings
21
  def get_bert_embedding(text):
22
  inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
23
  with torch.no_grad():
24
  outputs = model(**inputs)
 
25
  embeddings = outputs.last_hidden_state.mean(dim=1)
26
  return embeddings
27
 
 
30
  similarity = F.cosine_similarity(embedding1, embedding2)
31
  return similarity.item()
32
 
33
+ # Function to calculate sentence embedding similarity
34
+ def calculate_sentence_similarity(text1, text2):
35
+ embedding1 = sentence_model.encode(text1, convert_to_tensor=True)
36
+ embedding2 = sentence_model.encode(text2, convert_to_tensor=True)
37
+ return util.pytorch_cos_sim(embedding1, embedding2).item()
38
+
39
  # Function to compare logic of student and teacher answers
40
  def compare_answers(student_answer, teacher_answer):
41
+ bert_similarity = calculate_cosine_similarity(get_bert_embedding(student_answer), get_bert_embedding(teacher_answer))
42
+ sentence_similarity = calculate_sentence_similarity(student_answer, teacher_answer)
43
+ # Combine scores with weights to emphasize one method over another if needed
44
+ final_similarity = (0.5 * bert_similarity + 0.5 * sentence_similarity)
45
+ return final_similarity
46
+
47
+ # Function to extract keywords from the model answer (simple keyword extraction)
48
+ def extract_keywords(text):
49
+ # Split text into words and count them
50
+ return set(text.lower().split())
51
+
52
+ # Adjust grading based on key terms present in student answer
53
+ def check_keywords(student_answer, model_answer):
54
+ student_keywords = extract_keywords(student_answer)
55
+ teacher_keywords = extract_keywords(model_answer)
56
+ keyword_overlap = len(student_keywords.intersection(teacher_keywords))
57
+ return keyword_overlap / len(teacher_keywords) # Ratio of matching keywords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  # Function to evaluate student's answer by comparing it to a model answer
60
  def evaluate_answer(image, languages, model_answer):
61
  student_answer = extract_text_from_image(image, languages)
62
+ semantic_similarity = compare_answers(student_answer, model_answer)
63
+ keyword_similarity = check_keywords(student_answer, model_answer)
64
+
65
+ # Combine semantic similarity with keyword presence check
66
+ combined_similarity = (semantic_similarity + keyword_similarity) / 2
67
+ grade = get_grade(combined_similarity)
68
  feedback = f"Student's answer: {student_answer}\nTeacher's answer: {model_answer}"
 
69
  badge = assign_badge(grade)
70
+ detailed_feedback_msg = detailed_feedback(combined_similarity)
71
  prompt = f"The student got grade: {grade} when the student's answer is: {student_answer} and the teacher's answer is: {model_answer}. Justify the grade given to the student."
72
+ return grade, combined_similarity * 100, feedback, badge, detailed_feedback_msg, prompt
73
+
74
+ # The rest of your existing code...
75
+
76
 
77
  # Main interface function for Gradio
78
  async def gradio_interface(image, languages: List[str], model_answer="The process of photosynthesis helps plants produce glucose using sunlight.", prompt="", history=[]):