Dannyar608 commited on
Commit
1afdb58
·
verified ·
1 Parent(s): ba8e4ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +270 -6
app.py CHANGED
@@ -34,6 +34,7 @@ import calendar
34
  from dateutil.relativedelta import relativedelta
35
  import numpy as np
36
  import matplotlib.pyplot as plt
 
37
 
38
  # Enhanced Configuration
39
  PROFILES_DIR = "student_profiles"
@@ -62,6 +63,23 @@ logger = logging.getLogger(__name__)
62
  # Model configuration
63
  MODEL_NAME = "deepseek-ai/deepseek-llm-7b"
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  # Initialize Hugging Face API
66
  if HF_TOKEN:
67
  hf_api = None
@@ -130,6 +148,13 @@ def validate_file(file_obj) -> None:
130
  if file_size > MAX_FILE_SIZE_MB:
131
  raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
132
 
 
 
 
 
 
 
 
133
  def remove_sensitive_info(text: str) -> str:
134
  patterns = [
135
  (r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'),
@@ -316,20 +341,27 @@ class MiamiDadeTranscriptParser:
316
  def parse_transcript(self, file_path: str) -> Dict:
317
  """Parse Miami-Dade transcript PDF with multiple extraction methods"""
318
  try:
319
- # First try pdfplumber
320
  text = ""
321
  with pdfplumber.open(file_path) as pdf:
322
- for page in pdf.pages:
323
- text += page.extract_text() + "\n"
 
 
324
 
325
  # Fallback to PyMuPDF if text extraction is poor
326
  if len(text) < 500:
 
327
  doc = fitz.open(file_path)
328
  text = ""
329
  for page in doc:
330
  text += page.get_text()
331
 
332
  return self._parse_miami_dade_format(text)
 
 
 
 
333
  except Exception as e:
334
  logger.error(f"Error parsing transcript: {str(e)}")
335
  raise ValueError(f"Error processing transcript: {str(e)}")
@@ -636,7 +668,7 @@ class AcademicAnalyzer:
636
  'remaining': max(0, info.get('required', 0) - info.get('earned', 0))
637
  }
638
  for subject, info in credits.items()
639
- if info and info.get('required', 0) > info.get('earned', 0))
640
  ]
641
 
642
  current_grade = parsed_data.get('student_info', {}).get('grade', '')
@@ -912,7 +944,7 @@ class AcademicAnalyzer:
912
  study_blocks = 3
913
  plan['study_strategies'].extend([
914
  "Create physical models or demonstrations",
915
- "Study while walking or moving",
916
  "Use hands-on activities when possible"
917
  ])
918
 
@@ -1809,6 +1841,238 @@ class EnhancedTeachingAssistant:
1809
  # Initialize teaching assistant
1810
  teaching_assistant = EnhancedTeachingAssistant()
1811
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1812
  def create_enhanced_interface():
1813
  with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
1814
  session_token = gr.State(value=generate_session_token())
@@ -2532,7 +2796,7 @@ def create_enhanced_interface():
2532
  outputs=None
2533
  )
2534
 
2535
- app.load(fn=lambda: model_loader.load_model(), outputs=[])
2536
 
2537
  return app
2538
 
 
34
  from dateutil.relativedelta import relativedelta
35
  import numpy as np
36
  import matplotlib.pyplot as plt
37
+ from tqdm import tqdm
38
 
39
  # Enhanced Configuration
40
  PROFILES_DIR = "student_profiles"
 
63
  # Model configuration
64
  MODEL_NAME = "deepseek-ai/deepseek-llm-7b"
65
 
66
+ @lru_cache(maxsize=1)
67
+ def get_model_and_tokenizer():
68
+ """Load and cache the model and tokenizer"""
69
+ logger.info("Loading model and tokenizer...")
70
+ try:
71
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
72
+ model = AutoModelForCausalLM.from_pretrained(
73
+ MODEL_NAME,
74
+ torch_dtype=torch.float16,
75
+ device_map="auto"
76
+ )
77
+ logger.info("Model and tokenizer loaded successfully")
78
+ return model, tokenizer
79
+ except Exception as e:
80
+ logger.error(f"Failed to load model: {str(e)}")
81
+ raise
82
+
83
  # Initialize Hugging Face API
84
  if HF_TOKEN:
85
  hf_api = None
 
148
  if file_size > MAX_FILE_SIZE_MB:
149
  raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
150
 
151
+ def validate_date(date_str: str) -> bool:
152
+ try:
153
+ datetime.datetime.strptime(date_str, '%Y-%m-%d')
154
+ return True
155
+ except ValueError:
156
+ return False
157
+
158
  def remove_sensitive_info(text: str) -> str:
159
  patterns = [
160
  (r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]'),
 
341
  def parse_transcript(self, file_path: str) -> Dict:
342
  """Parse Miami-Dade transcript PDF with multiple extraction methods"""
343
  try:
344
+ # First try pdfplumber with progress bar
345
  text = ""
346
  with pdfplumber.open(file_path) as pdf:
347
+ with tqdm(total=len(pdf.pages), desc="Processing transcript") as pbar:
348
+ for page in pdf.pages:
349
+ text += page.extract_text() + "\n"
350
+ pbar.update(1)
351
 
352
  # Fallback to PyMuPDF if text extraction is poor
353
  if len(text) < 500:
354
+ logger.warning("Low text extraction with pdfplumber, trying PyMuPDF")
355
  doc = fitz.open(file_path)
356
  text = ""
357
  for page in doc:
358
  text += page.get_text()
359
 
360
  return self._parse_miami_dade_format(text)
361
+ except pdfplumber.PDFSyntaxError as e:
362
+ error_msg = "Invalid PDF file. Please ensure you're uploading a valid transcript PDF."
363
+ logger.error(f"{error_msg}: {str(e)}")
364
+ raise ValueError(f"{error_msg} If the problem persists, try converting the file to a different format.")
365
  except Exception as e:
366
  logger.error(f"Error parsing transcript: {str(e)}")
367
  raise ValueError(f"Error processing transcript: {str(e)}")
 
668
  'remaining': max(0, info.get('required', 0) - info.get('earned', 0))
669
  }
670
  for subject, info in credits.items()
671
+ if info and info.get('required', 0) > info.get('earned', 0)
672
  ]
673
 
674
  current_grade = parsed_data.get('student_info', {}).get('grade', '')
 
944
  study_blocks = 3
945
  plan['study_strategies'].extend([
946
  "Create physical models or demonstrations",
947
+ "Study while walking or pacing",
948
  "Use hands-on activities when possible"
949
  ])
950
 
 
1841
  # Initialize teaching assistant
1842
  teaching_assistant = EnhancedTeachingAssistant()
1843
 
1844
+ class StudyCalendar:
1845
+ def __init__(self):
1846
+ self.default_study_blocks = {
1847
+ 'Monday': [('16:00', '17:30'), ('19:00', '20:30')],
1848
+ 'Tuesday': [('16:00', '17:30')],
1849
+ 'Wednesday': [('16:00', '17:30'), ('19:00', '20:30')],
1850
+ 'Thursday': [('16:00', '17:30')],
1851
+ 'Friday': [('15:00', '16:30')],
1852
+ 'Saturday': [('10:00', '12:00')],
1853
+ 'Sunday': [('14:00', '16:00')]
1854
+ }
1855
+
1856
+ def generate_study_calendar(self, profile: Dict, start_date: str) -> Dict:
1857
+ """Generate a study calendar based on the student's profile"""
1858
+ try:
1859
+ calendar = {
1860
+ 'start_date': start_date,
1861
+ 'end_date': (datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=30)).strftime('%Y-%m-%d'),
1862
+ 'events': [],
1863
+ 'exams': []
1864
+ }
1865
+
1866
+ # Add regular study sessions
1867
+ current_date = datetime.datetime.strptime(start_date, '%Y-%m-%d')
1868
+ end_date = datetime.datetime.strptime(calendar['end_date'], '%Y-%m-%d')
1869
+
1870
+ while current_date <= end_date:
1871
+ day_name = current_date.strftime('%A')
1872
+ if day_name in self.default_study_blocks:
1873
+ for time_block in self.default_study_blocks[day_name]:
1874
+ calendar['events'].append({
1875
+ 'date': current_date.strftime('%Y-%m-%d'),
1876
+ 'title': 'Study Session',
1877
+ 'description': 'Focused study time',
1878
+ 'start_time': time_block[0],
1879
+ 'end_time': time_block[1],
1880
+ 'duration': f"{time_block[0]} to {time_block[1]}"
1881
+ })
1882
+ current_date += datetime.timedelta(days=1)
1883
+
1884
+ # Add exams from transcript if available
1885
+ transcript = profile.get('transcript', {})
1886
+ if transcript.get('course_history'):
1887
+ for course in transcript['course_history']:
1888
+ if 'exam' in course.get('course_title', '').lower():
1889
+ exam_date = (datetime.datetime.strptime(start_date, '%Y-%m-%d') +
1890
+ datetime.timedelta(days=random.randint(7, 28))).strftime('%Y-%m-%d')
1891
+ calendar['exams'].append({
1892
+ 'date': exam_date,
1893
+ 'title': course.get('course_title', 'Exam'),
1894
+ 'description': 'Prepare by reviewing materials',
1895
+ 'duration': 'All day'
1896
+ })
1897
+
1898
+ return calendar
1899
+ except Exception as e:
1900
+ logger.error(f"Error generating study calendar: {str(e)}")
1901
+ return {
1902
+ 'start_date': start_date,
1903
+ 'end_date': (datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=30)).strftime('%Y-%m-%d'),
1904
+ 'events': [],
1905
+ 'exams': []
1906
+ }
1907
+
1908
+ def create_calendar_visualization(self, calendar: Dict):
1909
+ """Create a visualization of the study calendar"""
1910
+ try:
1911
+ if not calendar.get('events') and not calendar.get('exams'):
1912
+ return None
1913
+
1914
+ events_df = pd.DataFrame(calendar['events'])
1915
+ exams_df = pd.DataFrame(calendar['exams'])
1916
+
1917
+ fig = px.timeline(
1918
+ events_df,
1919
+ x_start="start_time",
1920
+ x_end="end_time",
1921
+ y="date",
1922
+ color_discrete_sequence=['#4CAF50'],
1923
+ title="Study Schedule"
1924
+ )
1925
+
1926
+ if not exams_df.empty:
1927
+ fig.add_trace(px.timeline(
1928
+ exams_df,
1929
+ x_start=[datetime.time(0,0).strftime('%H:%M')] * len(exams_df),
1930
+ x_end=[datetime.time(23,59).strftime('%H:%M')] * len(exams_df),
1931
+ y="date",
1932
+ color_discrete_sequence=['#F44336']
1933
+ ).data[0])
1934
+
1935
+ fig.update_layout(
1936
+ plot_bgcolor='rgba(0,0,0,0)',
1937
+ paper_bgcolor='rgba(0,0,0,0)',
1938
+ font=dict(size=12),
1939
+ showlegend=False
1940
+ )
1941
+
1942
+ return fig
1943
+ except Exception as e:
1944
+ logger.error(f"Error creating calendar visualization: {str(e)}")
1945
+ return None
1946
+
1947
+ # Initialize study calendar
1948
+ study_calendar = StudyCalendar()
1949
+
1950
+ class GoalTracker:
1951
+ def __init__(self):
1952
+ self.goals_file = Path("student_goals.json")
1953
+ self.goals_file.touch(exist_ok=True)
1954
+
1955
+ def add_goal(self, student_name: str, goal_type: str, description: str,
1956
+ target_date: str, target_value: Optional[float] = None) -> bool:
1957
+ """Add a new goal for the student"""
1958
+ try:
1959
+ if not validate_date(target_date):
1960
+ raise ValueError("Invalid target date format. Please use YYYY-MM-DD")
1961
+
1962
+ goals = self._load_goals()
1963
+ student_goals = goals.get(student_name, [])
1964
+
1965
+ new_goal = {
1966
+ 'id': str(len(student_goals) + 1),
1967
+ 'type': goal_type,
1968
+ 'description': description,
1969
+ 'target_date': target_date,
1970
+ 'target_value': target_value,
1971
+ 'created_at': datetime.datetime.now().isoformat(),
1972
+ 'progress': []
1973
+ }
1974
+
1975
+ student_goals.append(new_goal)
1976
+ goals[student_name] = student_goals
1977
+
1978
+ with open(self.goals_file, 'w') as f:
1979
+ json.dump(goals, f, indent=2)
1980
+
1981
+ return True
1982
+ except Exception as e:
1983
+ logger.error(f"Error adding goal: {str(e)}")
1984
+ return False
1985
+
1986
+ def update_goal_progress(self, student_name: str, goal_id: str,
1987
+ progress_value: float, notes: str = "") -> bool:
1988
+ """Update progress for a specific goal"""
1989
+ try:
1990
+ goals = self._load_goals()
1991
+ if student_name not in goals:
1992
+ return False
1993
+
1994
+ for goal in goals[student_name]:
1995
+ if goal['id'] == goal_id:
1996
+ goal['progress'].append({
1997
+ 'date': datetime.datetime.now().isoformat(),
1998
+ 'value': progress_value,
1999
+ 'notes': notes
2000
+ })
2001
+ break
2002
+
2003
+ with open(self.goals_file, 'w') as f:
2004
+ json.dump(goals, f, indent=2)
2005
+
2006
+ return True
2007
+ except Exception as e:
2008
+ logger.error(f"Error updating goal progress: {str(e)}")
2009
+ return False
2010
+
2011
+ def get_goals(self, student_name: str) -> List[Dict]:
2012
+ """Get all goals for a student"""
2013
+ try:
2014
+ goals = self._load_goals()
2015
+ return goals.get(student_name, [])
2016
+ except Exception as e:
2017
+ logger.error(f"Error getting goals: {str(e)}")
2018
+ return []
2019
+
2020
+ def create_goal_visualization(self, goals: List[Dict]):
2021
+ """Create a visualization of goal progress"""
2022
+ try:
2023
+ if not goals:
2024
+ return None
2025
+
2026
+ progress_data = []
2027
+ for goal in goals:
2028
+ if goal.get('progress'):
2029
+ last_progress = goal['progress'][-1]
2030
+ progress_data.append({
2031
+ 'Goal': goal['description'],
2032
+ 'Progress': last_progress['value'],
2033
+ 'Target': goal.get('target_value', 100),
2034
+ 'Type': goal['type']
2035
+ })
2036
+
2037
+ if not progress_data:
2038
+ return None
2039
+
2040
+ df = pd.DataFrame(progress_data)
2041
+
2042
+ fig = px.bar(
2043
+ df,
2044
+ x='Goal',
2045
+ y=['Progress', 'Target'],
2046
+ barmode='group',
2047
+ title="Goal Progress",
2048
+ color_discrete_map={
2049
+ 'Progress': '#4CAF50',
2050
+ 'Target': '#2196F3'
2051
+ }
2052
+ )
2053
+
2054
+ fig.update_layout(
2055
+ plot_bgcolor='rgba(0,0,0,0)',
2056
+ paper_bgcolor='rgba(0,0,0,0)',
2057
+ font=dict(size=12)
2058
+ )
2059
+
2060
+ return fig
2061
+ except Exception as e:
2062
+ logger.error(f"Error creating goal visualization: {str(e)}")
2063
+ return None
2064
+
2065
+ def _load_goals(self) -> Dict:
2066
+ """Load all goals from the file"""
2067
+ try:
2068
+ with open(self.goals_file, 'r') as f:
2069
+ return json.load(f)
2070
+ except (json.JSONDecodeError, FileNotFoundError):
2071
+ return {}
2072
+
2073
+ # Initialize goal tracker
2074
+ goal_tracker = GoalTracker()
2075
+
2076
  def create_enhanced_interface():
2077
  with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
2078
  session_token = gr.State(value=generate_session_token())
 
2796
  outputs=None
2797
  )
2798
 
2799
+ app.load(fn=lambda: get_model_and_tokenizer(), outputs=[])
2800
 
2801
  return app
2802