Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -34,6 +34,7 @@ import calendar
|
|
34 |
from dateutil.relativedelta import relativedelta
|
35 |
import numpy as np
|
36 |
import matplotlib.pyplot as plt
|
|
|
37 |
|
38 |
# Enhanced Configuration
|
39 |
PROFILES_DIR = "student_profiles"
|
@@ -62,6 +63,23 @@ logger = logging.getLogger(__name__)
|
|
62 |
# Model configuration
|
63 |
MODEL_NAME = "deepseek-ai/deepseek-llm-7b"
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
# Initialize Hugging Face API
|
66 |
if HF_TOKEN:
|
67 |
hf_api = None
|
@@ -130,6 +148,13 @@ def validate_file(file_obj) -> None:
|
|
130 |
if file_size > MAX_FILE_SIZE_MB:
|
131 |
raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
|
132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
def remove_sensitive_info(text: str) -> str:
|
134 |
patterns = [
|
135 |
(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'),
|
@@ -316,20 +341,27 @@ class MiamiDadeTranscriptParser:
|
|
316 |
def parse_transcript(self, file_path: str) -> Dict:
|
317 |
"""Parse Miami-Dade transcript PDF with multiple extraction methods"""
|
318 |
try:
|
319 |
-
# First try pdfplumber
|
320 |
text = ""
|
321 |
with pdfplumber.open(file_path) as pdf:
|
322 |
-
|
323 |
-
|
|
|
|
|
324 |
|
325 |
# Fallback to PyMuPDF if text extraction is poor
|
326 |
if len(text) < 500:
|
|
|
327 |
doc = fitz.open(file_path)
|
328 |
text = ""
|
329 |
for page in doc:
|
330 |
text += page.get_text()
|
331 |
|
332 |
return self._parse_miami_dade_format(text)
|
|
|
|
|
|
|
|
|
333 |
except Exception as e:
|
334 |
logger.error(f"Error parsing transcript: {str(e)}")
|
335 |
raise ValueError(f"Error processing transcript: {str(e)}")
|
@@ -636,7 +668,7 @@ class AcademicAnalyzer:
|
|
636 |
'remaining': max(0, info.get('required', 0) - info.get('earned', 0))
|
637 |
}
|
638 |
for subject, info in credits.items()
|
639 |
-
if info and info.get('required', 0) > info.get('earned', 0)
|
640 |
]
|
641 |
|
642 |
current_grade = parsed_data.get('student_info', {}).get('grade', '')
|
@@ -912,7 +944,7 @@ class AcademicAnalyzer:
|
|
912 |
study_blocks = 3
|
913 |
plan['study_strategies'].extend([
|
914 |
"Create physical models or demonstrations",
|
915 |
-
"Study while walking or
|
916 |
"Use hands-on activities when possible"
|
917 |
])
|
918 |
|
@@ -1809,6 +1841,238 @@ class EnhancedTeachingAssistant:
|
|
1809 |
# Initialize teaching assistant
|
1810 |
teaching_assistant = EnhancedTeachingAssistant()
|
1811 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1812 |
def create_enhanced_interface():
|
1813 |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
|
1814 |
session_token = gr.State(value=generate_session_token())
|
@@ -2532,7 +2796,7 @@ def create_enhanced_interface():
|
|
2532 |
outputs=None
|
2533 |
)
|
2534 |
|
2535 |
-
app.load(fn=lambda:
|
2536 |
|
2537 |
return app
|
2538 |
|
|
|
34 |
from dateutil.relativedelta import relativedelta
|
35 |
import numpy as np
|
36 |
import matplotlib.pyplot as plt
|
37 |
+
from tqdm import tqdm
|
38 |
|
39 |
# Enhanced Configuration
|
40 |
PROFILES_DIR = "student_profiles"
|
|
|
63 |
# Model configuration
|
64 |
MODEL_NAME = "deepseek-ai/deepseek-llm-7b"
|
65 |
|
66 |
+
@lru_cache(maxsize=1)
|
67 |
+
def get_model_and_tokenizer():
|
68 |
+
"""Load and cache the model and tokenizer"""
|
69 |
+
logger.info("Loading model and tokenizer...")
|
70 |
+
try:
|
71 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
72 |
+
model = AutoModelForCausalLM.from_pretrained(
|
73 |
+
MODEL_NAME,
|
74 |
+
torch_dtype=torch.float16,
|
75 |
+
device_map="auto"
|
76 |
+
)
|
77 |
+
logger.info("Model and tokenizer loaded successfully")
|
78 |
+
return model, tokenizer
|
79 |
+
except Exception as e:
|
80 |
+
logger.error(f"Failed to load model: {str(e)}")
|
81 |
+
raise
|
82 |
+
|
83 |
# Initialize Hugging Face API
|
84 |
if HF_TOKEN:
|
85 |
hf_api = None
|
|
|
148 |
if file_size > MAX_FILE_SIZE_MB:
|
149 |
raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
|
150 |
|
151 |
+
def validate_date(date_str: str) -> bool:
|
152 |
+
try:
|
153 |
+
datetime.datetime.strptime(date_str, '%Y-%m-%d')
|
154 |
+
return True
|
155 |
+
except ValueError:
|
156 |
+
return False
|
157 |
+
|
158 |
def remove_sensitive_info(text: str) -> str:
|
159 |
patterns = [
|
160 |
(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'),
|
|
|
341 |
def parse_transcript(self, file_path: str) -> Dict:
|
342 |
"""Parse Miami-Dade transcript PDF with multiple extraction methods"""
|
343 |
try:
|
344 |
+
# First try pdfplumber with progress bar
|
345 |
text = ""
|
346 |
with pdfplumber.open(file_path) as pdf:
|
347 |
+
with tqdm(total=len(pdf.pages), desc="Processing transcript") as pbar:
|
348 |
+
for page in pdf.pages:
|
349 |
+
text += page.extract_text() + "\n"
|
350 |
+
pbar.update(1)
|
351 |
|
352 |
# Fallback to PyMuPDF if text extraction is poor
|
353 |
if len(text) < 500:
|
354 |
+
logger.warning("Low text extraction with pdfplumber, trying PyMuPDF")
|
355 |
doc = fitz.open(file_path)
|
356 |
text = ""
|
357 |
for page in doc:
|
358 |
text += page.get_text()
|
359 |
|
360 |
return self._parse_miami_dade_format(text)
|
361 |
+
except pdfplumber.PDFSyntaxError as e:
|
362 |
+
error_msg = "Invalid PDF file. Please ensure you're uploading a valid transcript PDF."
|
363 |
+
logger.error(f"{error_msg}: {str(e)}")
|
364 |
+
raise ValueError(f"{error_msg} If the problem persists, try converting the file to a different format.")
|
365 |
except Exception as e:
|
366 |
logger.error(f"Error parsing transcript: {str(e)}")
|
367 |
raise ValueError(f"Error processing transcript: {str(e)}")
|
|
|
668 |
'remaining': max(0, info.get('required', 0) - info.get('earned', 0))
|
669 |
}
|
670 |
for subject, info in credits.items()
|
671 |
+
if info and info.get('required', 0) > info.get('earned', 0)
|
672 |
]
|
673 |
|
674 |
current_grade = parsed_data.get('student_info', {}).get('grade', '')
|
|
|
944 |
study_blocks = 3
|
945 |
plan['study_strategies'].extend([
|
946 |
"Create physical models or demonstrations",
|
947 |
+
"Study while walking or pacing",
|
948 |
"Use hands-on activities when possible"
|
949 |
])
|
950 |
|
|
|
1841 |
# Initialize teaching assistant
|
1842 |
teaching_assistant = EnhancedTeachingAssistant()
|
1843 |
|
1844 |
+
class StudyCalendar:
|
1845 |
+
def __init__(self):
|
1846 |
+
self.default_study_blocks = {
|
1847 |
+
'Monday': [('16:00', '17:30'), ('19:00', '20:30')],
|
1848 |
+
'Tuesday': [('16:00', '17:30')],
|
1849 |
+
'Wednesday': [('16:00', '17:30'), ('19:00', '20:30')],
|
1850 |
+
'Thursday': [('16:00', '17:30')],
|
1851 |
+
'Friday': [('15:00', '16:30')],
|
1852 |
+
'Saturday': [('10:00', '12:00')],
|
1853 |
+
'Sunday': [('14:00', '16:00')]
|
1854 |
+
}
|
1855 |
+
|
1856 |
+
def generate_study_calendar(self, profile: Dict, start_date: str) -> Dict:
|
1857 |
+
"""Generate a study calendar based on the student's profile"""
|
1858 |
+
try:
|
1859 |
+
calendar = {
|
1860 |
+
'start_date': start_date,
|
1861 |
+
'end_date': (datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=30)).strftime('%Y-%m-%d'),
|
1862 |
+
'events': [],
|
1863 |
+
'exams': []
|
1864 |
+
}
|
1865 |
+
|
1866 |
+
# Add regular study sessions
|
1867 |
+
current_date = datetime.datetime.strptime(start_date, '%Y-%m-%d')
|
1868 |
+
end_date = datetime.datetime.strptime(calendar['end_date'], '%Y-%m-%d')
|
1869 |
+
|
1870 |
+
while current_date <= end_date:
|
1871 |
+
day_name = current_date.strftime('%A')
|
1872 |
+
if day_name in self.default_study_blocks:
|
1873 |
+
for time_block in self.default_study_blocks[day_name]:
|
1874 |
+
calendar['events'].append({
|
1875 |
+
'date': current_date.strftime('%Y-%m-%d'),
|
1876 |
+
'title': 'Study Session',
|
1877 |
+
'description': 'Focused study time',
|
1878 |
+
'start_time': time_block[0],
|
1879 |
+
'end_time': time_block[1],
|
1880 |
+
'duration': f"{time_block[0]} to {time_block[1]}"
|
1881 |
+
})
|
1882 |
+
current_date += datetime.timedelta(days=1)
|
1883 |
+
|
1884 |
+
# Add exams from transcript if available
|
1885 |
+
transcript = profile.get('transcript', {})
|
1886 |
+
if transcript.get('course_history'):
|
1887 |
+
for course in transcript['course_history']:
|
1888 |
+
if 'exam' in course.get('course_title', '').lower():
|
1889 |
+
exam_date = (datetime.datetime.strptime(start_date, '%Y-%m-%d') +
|
1890 |
+
datetime.timedelta(days=random.randint(7, 28))).strftime('%Y-%m-%d')
|
1891 |
+
calendar['exams'].append({
|
1892 |
+
'date': exam_date,
|
1893 |
+
'title': course.get('course_title', 'Exam'),
|
1894 |
+
'description': 'Prepare by reviewing materials',
|
1895 |
+
'duration': 'All day'
|
1896 |
+
})
|
1897 |
+
|
1898 |
+
return calendar
|
1899 |
+
except Exception as e:
|
1900 |
+
logger.error(f"Error generating study calendar: {str(e)}")
|
1901 |
+
return {
|
1902 |
+
'start_date': start_date,
|
1903 |
+
'end_date': (datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=30)).strftime('%Y-%m-%d'),
|
1904 |
+
'events': [],
|
1905 |
+
'exams': []
|
1906 |
+
}
|
1907 |
+
|
1908 |
+
def create_calendar_visualization(self, calendar: Dict):
|
1909 |
+
"""Create a visualization of the study calendar"""
|
1910 |
+
try:
|
1911 |
+
if not calendar.get('events') and not calendar.get('exams'):
|
1912 |
+
return None
|
1913 |
+
|
1914 |
+
events_df = pd.DataFrame(calendar['events'])
|
1915 |
+
exams_df = pd.DataFrame(calendar['exams'])
|
1916 |
+
|
1917 |
+
fig = px.timeline(
|
1918 |
+
events_df,
|
1919 |
+
x_start="start_time",
|
1920 |
+
x_end="end_time",
|
1921 |
+
y="date",
|
1922 |
+
color_discrete_sequence=['#4CAF50'],
|
1923 |
+
title="Study Schedule"
|
1924 |
+
)
|
1925 |
+
|
1926 |
+
if not exams_df.empty:
|
1927 |
+
fig.add_trace(px.timeline(
|
1928 |
+
exams_df,
|
1929 |
+
x_start=[datetime.time(0,0).strftime('%H:%M')] * len(exams_df),
|
1930 |
+
x_end=[datetime.time(23,59).strftime('%H:%M')] * len(exams_df),
|
1931 |
+
y="date",
|
1932 |
+
color_discrete_sequence=['#F44336']
|
1933 |
+
).data[0])
|
1934 |
+
|
1935 |
+
fig.update_layout(
|
1936 |
+
plot_bgcolor='rgba(0,0,0,0)',
|
1937 |
+
paper_bgcolor='rgba(0,0,0,0)',
|
1938 |
+
font=dict(size=12),
|
1939 |
+
showlegend=False
|
1940 |
+
)
|
1941 |
+
|
1942 |
+
return fig
|
1943 |
+
except Exception as e:
|
1944 |
+
logger.error(f"Error creating calendar visualization: {str(e)}")
|
1945 |
+
return None
|
1946 |
+
|
1947 |
+
# Initialize study calendar
|
1948 |
+
study_calendar = StudyCalendar()
|
1949 |
+
|
1950 |
+
class GoalTracker:
|
1951 |
+
def __init__(self):
|
1952 |
+
self.goals_file = Path("student_goals.json")
|
1953 |
+
self.goals_file.touch(exist_ok=True)
|
1954 |
+
|
1955 |
+
def add_goal(self, student_name: str, goal_type: str, description: str,
|
1956 |
+
target_date: str, target_value: Optional[float] = None) -> bool:
|
1957 |
+
"""Add a new goal for the student"""
|
1958 |
+
try:
|
1959 |
+
if not validate_date(target_date):
|
1960 |
+
raise ValueError("Invalid target date format. Please use YYYY-MM-DD")
|
1961 |
+
|
1962 |
+
goals = self._load_goals()
|
1963 |
+
student_goals = goals.get(student_name, [])
|
1964 |
+
|
1965 |
+
new_goal = {
|
1966 |
+
'id': str(len(student_goals) + 1),
|
1967 |
+
'type': goal_type,
|
1968 |
+
'description': description,
|
1969 |
+
'target_date': target_date,
|
1970 |
+
'target_value': target_value,
|
1971 |
+
'created_at': datetime.datetime.now().isoformat(),
|
1972 |
+
'progress': []
|
1973 |
+
}
|
1974 |
+
|
1975 |
+
student_goals.append(new_goal)
|
1976 |
+
goals[student_name] = student_goals
|
1977 |
+
|
1978 |
+
with open(self.goals_file, 'w') as f:
|
1979 |
+
json.dump(goals, f, indent=2)
|
1980 |
+
|
1981 |
+
return True
|
1982 |
+
except Exception as e:
|
1983 |
+
logger.error(f"Error adding goal: {str(e)}")
|
1984 |
+
return False
|
1985 |
+
|
1986 |
+
def update_goal_progress(self, student_name: str, goal_id: str,
|
1987 |
+
progress_value: float, notes: str = "") -> bool:
|
1988 |
+
"""Update progress for a specific goal"""
|
1989 |
+
try:
|
1990 |
+
goals = self._load_goals()
|
1991 |
+
if student_name not in goals:
|
1992 |
+
return False
|
1993 |
+
|
1994 |
+
for goal in goals[student_name]:
|
1995 |
+
if goal['id'] == goal_id:
|
1996 |
+
goal['progress'].append({
|
1997 |
+
'date': datetime.datetime.now().isoformat(),
|
1998 |
+
'value': progress_value,
|
1999 |
+
'notes': notes
|
2000 |
+
})
|
2001 |
+
break
|
2002 |
+
|
2003 |
+
with open(self.goals_file, 'w') as f:
|
2004 |
+
json.dump(goals, f, indent=2)
|
2005 |
+
|
2006 |
+
return True
|
2007 |
+
except Exception as e:
|
2008 |
+
logger.error(f"Error updating goal progress: {str(e)}")
|
2009 |
+
return False
|
2010 |
+
|
2011 |
+
def get_goals(self, student_name: str) -> List[Dict]:
|
2012 |
+
"""Get all goals for a student"""
|
2013 |
+
try:
|
2014 |
+
goals = self._load_goals()
|
2015 |
+
return goals.get(student_name, [])
|
2016 |
+
except Exception as e:
|
2017 |
+
logger.error(f"Error getting goals: {str(e)}")
|
2018 |
+
return []
|
2019 |
+
|
2020 |
+
def create_goal_visualization(self, goals: List[Dict]):
|
2021 |
+
"""Create a visualization of goal progress"""
|
2022 |
+
try:
|
2023 |
+
if not goals:
|
2024 |
+
return None
|
2025 |
+
|
2026 |
+
progress_data = []
|
2027 |
+
for goal in goals:
|
2028 |
+
if goal.get('progress'):
|
2029 |
+
last_progress = goal['progress'][-1]
|
2030 |
+
progress_data.append({
|
2031 |
+
'Goal': goal['description'],
|
2032 |
+
'Progress': last_progress['value'],
|
2033 |
+
'Target': goal.get('target_value', 100),
|
2034 |
+
'Type': goal['type']
|
2035 |
+
})
|
2036 |
+
|
2037 |
+
if not progress_data:
|
2038 |
+
return None
|
2039 |
+
|
2040 |
+
df = pd.DataFrame(progress_data)
|
2041 |
+
|
2042 |
+
fig = px.bar(
|
2043 |
+
df,
|
2044 |
+
x='Goal',
|
2045 |
+
y=['Progress', 'Target'],
|
2046 |
+
barmode='group',
|
2047 |
+
title="Goal Progress",
|
2048 |
+
color_discrete_map={
|
2049 |
+
'Progress': '#4CAF50',
|
2050 |
+
'Target': '#2196F3'
|
2051 |
+
}
|
2052 |
+
)
|
2053 |
+
|
2054 |
+
fig.update_layout(
|
2055 |
+
plot_bgcolor='rgba(0,0,0,0)',
|
2056 |
+
paper_bgcolor='rgba(0,0,0,0)',
|
2057 |
+
font=dict(size=12)
|
2058 |
+
)
|
2059 |
+
|
2060 |
+
return fig
|
2061 |
+
except Exception as e:
|
2062 |
+
logger.error(f"Error creating goal visualization: {str(e)}")
|
2063 |
+
return None
|
2064 |
+
|
2065 |
+
def _load_goals(self) -> Dict:
|
2066 |
+
"""Load all goals from the file"""
|
2067 |
+
try:
|
2068 |
+
with open(self.goals_file, 'r') as f:
|
2069 |
+
return json.load(f)
|
2070 |
+
except (json.JSONDecodeError, FileNotFoundError):
|
2071 |
+
return {}
|
2072 |
+
|
2073 |
+
# Initialize goal tracker
|
2074 |
+
goal_tracker = GoalTracker()
|
2075 |
+
|
2076 |
def create_enhanced_interface():
|
2077 |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
|
2078 |
session_token = gr.State(value=generate_session_token())
|
|
|
2796 |
outputs=None
|
2797 |
)
|
2798 |
|
2799 |
+
app.load(fn=lambda: get_model_and_tokenizer(), outputs=[])
|
2800 |
|
2801 |
return app
|
2802 |
|