Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -31,7 +31,11 @@ SESSION_TOKEN_LENGTH = 32
|
|
31 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
32 |
|
33 |
# Initialize logging
|
34 |
-
logging.basicConfig(
|
|
|
|
|
|
|
|
|
35 |
|
36 |
# Model configuration
|
37 |
MODEL_CHOICES = {
|
@@ -43,8 +47,11 @@ DEFAULT_MODEL = "TinyLlama (Fastest)"
|
|
43 |
|
44 |
# Initialize Hugging Face API
|
45 |
if HF_TOKEN:
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
48 |
|
49 |
# ========== OPTIMIZED MODEL LOADING ==========
|
50 |
class ModelLoader:
|
@@ -56,15 +63,17 @@ class ModelLoader:
|
|
56 |
self.error = None
|
57 |
self.current_model = None
|
58 |
|
59 |
-
def load_model(self, model_name, progress
|
60 |
"""Lazy load the model with progress feedback"""
|
61 |
if self.loaded and self.current_model == model_name:
|
62 |
return self.model, self.tokenizer
|
63 |
|
64 |
self.loading = True
|
65 |
self.error = None
|
|
|
66 |
try:
|
67 |
-
progress
|
|
|
68 |
|
69 |
# Clear previous model if any
|
70 |
if self.model:
|
@@ -84,25 +93,29 @@ class ModelLoader:
|
|
84 |
if "TinyLlama" in model_name:
|
85 |
model_kwargs["attn_implementation"] = "flash_attention_2"
|
86 |
|
87 |
-
progress
|
|
|
88 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
89 |
MODEL_CHOICES[model_name],
|
90 |
trust_remote_code=True
|
91 |
)
|
92 |
|
93 |
-
progress
|
|
|
94 |
self.model = AutoModelForCausalLM.from_pretrained(
|
95 |
MODEL_CHOICES[model_name],
|
96 |
**model_kwargs
|
97 |
)
|
98 |
|
99 |
# Verify model responsiveness
|
100 |
-
progress
|
|
|
101 |
test_input = self.tokenizer("Test", return_tensors="pt").to(self.model.device)
|
102 |
_ = self.model.generate(**test_input, max_new_tokens=1)
|
103 |
|
104 |
self.model.eval() # Disable dropout
|
105 |
-
progress
|
|
|
106 |
self.loaded = True
|
107 |
self.current_model = model_name
|
108 |
return self.model, self.tokenizer
|
@@ -113,7 +126,7 @@ class ModelLoader:
|
|
113 |
return None, None
|
114 |
except Exception as e:
|
115 |
self.error = str(e)
|
116 |
-
logging.error(f"Model loading error: {
|
117 |
return None, None
|
118 |
finally:
|
119 |
self.loading = False
|
@@ -394,10 +407,12 @@ def parse_transcript_with_ai(text: str, progress=gr.Progress()) -> Dict:
|
|
394 |
|
395 |
# First try the structured parser
|
396 |
try:
|
397 |
-
progress
|
|
|
398 |
parser = TranscriptParser()
|
399 |
parsed_data = parser.parse_transcript(text)
|
400 |
-
progress
|
|
|
401 |
|
402 |
# Convert to expected format
|
403 |
formatted_data = {
|
@@ -420,7 +435,8 @@ def parse_transcript_with_ai(text: str, progress=gr.Progress()) -> Dict:
|
|
420 |
"grade_level": course["grade_level"]
|
421 |
})
|
422 |
|
423 |
-
progress
|
|
|
424 |
return validate_parsed_data(formatted_data)
|
425 |
|
426 |
except Exception as e:
|
@@ -452,11 +468,13 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
452 |
"""
|
453 |
|
454 |
try:
|
455 |
-
progress
|
|
|
456 |
|
457 |
# Tokenize and generate response
|
458 |
inputs = model_loader.tokenizer(prompt, return_tensors="pt").to(model_loader.model.device)
|
459 |
-
progress
|
|
|
460 |
|
461 |
outputs = model_loader.model.generate(
|
462 |
**inputs,
|
@@ -464,7 +482,8 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
464 |
temperature=0.1,
|
465 |
do_sample=True
|
466 |
)
|
467 |
-
progress
|
|
|
468 |
|
469 |
# Decode the response
|
470 |
response = model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
@@ -478,7 +497,8 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
478 |
json_str = re.search(r'\{.*\}', response, re.DOTALL).group()
|
479 |
parsed_data = json.loads(json_str)
|
480 |
|
481 |
-
progress
|
|
|
482 |
return validate_parsed_data(parsed_data)
|
483 |
|
484 |
except torch.cuda.OutOfMemoryError:
|
@@ -828,7 +848,7 @@ class ProfileManager:
|
|
828 |
json.dump(data, f, indent=2, ensure_ascii=False)
|
829 |
|
830 |
# Upload to HF Hub if token is available
|
831 |
-
if HF_TOKEN:
|
832 |
try:
|
833 |
hf_api.upload_file(
|
834 |
path_or_fileobj=filepath,
|
@@ -867,7 +887,7 @@ class ProfileManager:
|
|
867 |
|
868 |
if not profile_file.exists():
|
869 |
# Try loading from HF Hub
|
870 |
-
if HF_TOKEN:
|
871 |
try:
|
872 |
hf_api.download_file(
|
873 |
path_in_repo=f"profiles/{profile_file.name}",
|
@@ -1379,7 +1399,7 @@ def create_interface():
|
|
1379 |
except Exception as e:
|
1380 |
logging.error(f"Upload error: {str(e)}")
|
1381 |
return (
|
1382 |
-
"Error processing transcript
|
1383 |
None,
|
1384 |
current_tab_status,
|
1385 |
gr.update(),
|
|
|
31 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
32 |
|
33 |
# Initialize logging
|
34 |
+
logging.basicConfig(
|
35 |
+
filename='app.log',
|
36 |
+
level=logging.INFO,
|
37 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
38 |
+
)
|
39 |
|
40 |
# Model configuration
|
41 |
MODEL_CHOICES = {
|
|
|
47 |
|
48 |
# Initialize Hugging Face API
|
49 |
if HF_TOKEN:
|
50 |
+
try:
|
51 |
+
hf_api = HfApi(token=HF_TOKEN)
|
52 |
+
HfFolder.save_token(HF_TOKEN)
|
53 |
+
except Exception as e:
|
54 |
+
logging.error(f"Failed to initialize Hugging Face API: {str(e)}")
|
55 |
|
56 |
# ========== OPTIMIZED MODEL LOADING ==========
|
57 |
class ModelLoader:
|
|
|
63 |
self.error = None
|
64 |
self.current_model = None
|
65 |
|
66 |
+
def load_model(self, model_name: str, progress: gr.Progress = None) -> Tuple[Optional[AutoModelForCausalLM], Optional[AutoTokenizer]]:
|
67 |
"""Lazy load the model with progress feedback"""
|
68 |
if self.loaded and self.current_model == model_name:
|
69 |
return self.model, self.tokenizer
|
70 |
|
71 |
self.loading = True
|
72 |
self.error = None
|
73 |
+
|
74 |
try:
|
75 |
+
if progress:
|
76 |
+
progress(0.1, desc="Initializing...")
|
77 |
|
78 |
# Clear previous model if any
|
79 |
if self.model:
|
|
|
93 |
if "TinyLlama" in model_name:
|
94 |
model_kwargs["attn_implementation"] = "flash_attention_2"
|
95 |
|
96 |
+
if progress:
|
97 |
+
progress(0.3, desc="Loading tokenizer...")
|
98 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
99 |
MODEL_CHOICES[model_name],
|
100 |
trust_remote_code=True
|
101 |
)
|
102 |
|
103 |
+
if progress:
|
104 |
+
progress(0.6, desc="Loading model...")
|
105 |
self.model = AutoModelForCausalLM.from_pretrained(
|
106 |
MODEL_CHOICES[model_name],
|
107 |
**model_kwargs
|
108 |
)
|
109 |
|
110 |
# Verify model responsiveness
|
111 |
+
if progress:
|
112 |
+
progress(0.8, desc="Verifying model...")
|
113 |
test_input = self.tokenizer("Test", return_tensors="pt").to(self.model.device)
|
114 |
_ = self.model.generate(**test_input, max_new_tokens=1)
|
115 |
|
116 |
self.model.eval() # Disable dropout
|
117 |
+
if progress:
|
118 |
+
progress(0.9, desc="Finalizing...")
|
119 |
self.loaded = True
|
120 |
self.current_model = model_name
|
121 |
return self.model, self.tokenizer
|
|
|
126 |
return None, None
|
127 |
except Exception as e:
|
128 |
self.error = str(e)
|
129 |
+
logging.error(f"Model loading error: {str(e)}")
|
130 |
return None, None
|
131 |
finally:
|
132 |
self.loading = False
|
|
|
407 |
|
408 |
# First try the structured parser
|
409 |
try:
|
410 |
+
if progress:
|
411 |
+
progress(0.1, desc="Parsing transcript structure...")
|
412 |
parser = TranscriptParser()
|
413 |
parsed_data = parser.parse_transcript(text)
|
414 |
+
if progress:
|
415 |
+
progress(0.9, desc="Formatting results...")
|
416 |
|
417 |
# Convert to expected format
|
418 |
formatted_data = {
|
|
|
435 |
"grade_level": course["grade_level"]
|
436 |
})
|
437 |
|
438 |
+
if progress:
|
439 |
+
progress(1.0)
|
440 |
return validate_parsed_data(formatted_data)
|
441 |
|
442 |
except Exception as e:
|
|
|
468 |
"""
|
469 |
|
470 |
try:
|
471 |
+
if progress:
|
472 |
+
progress(0.1, desc="Processing transcript with AI...")
|
473 |
|
474 |
# Tokenize and generate response
|
475 |
inputs = model_loader.tokenizer(prompt, return_tensors="pt").to(model_loader.model.device)
|
476 |
+
if progress:
|
477 |
+
progress(0.4)
|
478 |
|
479 |
outputs = model_loader.model.generate(
|
480 |
**inputs,
|
|
|
482 |
temperature=0.1,
|
483 |
do_sample=True
|
484 |
)
|
485 |
+
if progress:
|
486 |
+
progress(0.8)
|
487 |
|
488 |
# Decode the response
|
489 |
response = model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
497 |
json_str = re.search(r'\{.*\}', response, re.DOTALL).group()
|
498 |
parsed_data = json.loads(json_str)
|
499 |
|
500 |
+
if progress:
|
501 |
+
progress(1.0)
|
502 |
return validate_parsed_data(parsed_data)
|
503 |
|
504 |
except torch.cuda.OutOfMemoryError:
|
|
|
848 |
json.dump(data, f, indent=2, ensure_ascii=False)
|
849 |
|
850 |
# Upload to HF Hub if token is available
|
851 |
+
if HF_TOKEN and hf_api:
|
852 |
try:
|
853 |
hf_api.upload_file(
|
854 |
path_or_fileobj=filepath,
|
|
|
887 |
|
888 |
if not profile_file.exists():
|
889 |
# Try loading from HF Hub
|
890 |
+
if HF_TOKEN and hf_api:
|
891 |
try:
|
892 |
hf_api.download_file(
|
893 |
path_in_repo=f"profiles/{profile_file.name}",
|
|
|
1399 |
except Exception as e:
|
1400 |
logging.error(f"Upload error: {str(e)}")
|
1401 |
return (
|
1402 |
+
f"Error processing transcript: {str(e)}",
|
1403 |
None,
|
1404 |
current_tab_status,
|
1405 |
gr.update(),
|