Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -31,7 +31,11 @@ SESSION_TOKEN_LENGTH = 32
|
|
| 31 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 32 |
|
| 33 |
# Initialize logging
|
| 34 |
-
logging.basicConfig(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
# Model configuration
|
| 37 |
MODEL_CHOICES = {
|
|
@@ -43,8 +47,11 @@ DEFAULT_MODEL = "TinyLlama (Fastest)"
|
|
| 43 |
|
| 44 |
# Initialize Hugging Face API
|
| 45 |
if HF_TOKEN:
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
# ========== OPTIMIZED MODEL LOADING ==========
|
| 50 |
class ModelLoader:
|
|
@@ -56,15 +63,17 @@ class ModelLoader:
|
|
| 56 |
self.error = None
|
| 57 |
self.current_model = None
|
| 58 |
|
| 59 |
-
def load_model(self, model_name, progress
|
| 60 |
"""Lazy load the model with progress feedback"""
|
| 61 |
if self.loaded and self.current_model == model_name:
|
| 62 |
return self.model, self.tokenizer
|
| 63 |
|
| 64 |
self.loading = True
|
| 65 |
self.error = None
|
|
|
|
| 66 |
try:
|
| 67 |
-
progress
|
|
|
|
| 68 |
|
| 69 |
# Clear previous model if any
|
| 70 |
if self.model:
|
|
@@ -84,25 +93,29 @@ class ModelLoader:
|
|
| 84 |
if "TinyLlama" in model_name:
|
| 85 |
model_kwargs["attn_implementation"] = "flash_attention_2"
|
| 86 |
|
| 87 |
-
progress
|
|
|
|
| 88 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 89 |
MODEL_CHOICES[model_name],
|
| 90 |
trust_remote_code=True
|
| 91 |
)
|
| 92 |
|
| 93 |
-
progress
|
|
|
|
| 94 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 95 |
MODEL_CHOICES[model_name],
|
| 96 |
**model_kwargs
|
| 97 |
)
|
| 98 |
|
| 99 |
# Verify model responsiveness
|
| 100 |
-
progress
|
|
|
|
| 101 |
test_input = self.tokenizer("Test", return_tensors="pt").to(self.model.device)
|
| 102 |
_ = self.model.generate(**test_input, max_new_tokens=1)
|
| 103 |
|
| 104 |
self.model.eval() # Disable dropout
|
| 105 |
-
progress
|
|
|
|
| 106 |
self.loaded = True
|
| 107 |
self.current_model = model_name
|
| 108 |
return self.model, self.tokenizer
|
|
@@ -113,7 +126,7 @@ class ModelLoader:
|
|
| 113 |
return None, None
|
| 114 |
except Exception as e:
|
| 115 |
self.error = str(e)
|
| 116 |
-
logging.error(f"Model loading error: {
|
| 117 |
return None, None
|
| 118 |
finally:
|
| 119 |
self.loading = False
|
|
@@ -394,10 +407,12 @@ def parse_transcript_with_ai(text: str, progress=gr.Progress()) -> Dict:
|
|
| 394 |
|
| 395 |
# First try the structured parser
|
| 396 |
try:
|
| 397 |
-
progress
|
|
|
|
| 398 |
parser = TranscriptParser()
|
| 399 |
parsed_data = parser.parse_transcript(text)
|
| 400 |
-
progress
|
|
|
|
| 401 |
|
| 402 |
# Convert to expected format
|
| 403 |
formatted_data = {
|
|
@@ -420,7 +435,8 @@ def parse_transcript_with_ai(text: str, progress=gr.Progress()) -> Dict:
|
|
| 420 |
"grade_level": course["grade_level"]
|
| 421 |
})
|
| 422 |
|
| 423 |
-
progress
|
|
|
|
| 424 |
return validate_parsed_data(formatted_data)
|
| 425 |
|
| 426 |
except Exception as e:
|
|
@@ -452,11 +468,13 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
| 452 |
"""
|
| 453 |
|
| 454 |
try:
|
| 455 |
-
progress
|
|
|
|
| 456 |
|
| 457 |
# Tokenize and generate response
|
| 458 |
inputs = model_loader.tokenizer(prompt, return_tensors="pt").to(model_loader.model.device)
|
| 459 |
-
progress
|
|
|
|
| 460 |
|
| 461 |
outputs = model_loader.model.generate(
|
| 462 |
**inputs,
|
|
@@ -464,7 +482,8 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
| 464 |
temperature=0.1,
|
| 465 |
do_sample=True
|
| 466 |
)
|
| 467 |
-
progress
|
|
|
|
| 468 |
|
| 469 |
# Decode the response
|
| 470 |
response = model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
@@ -478,7 +497,8 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
| 478 |
json_str = re.search(r'\{.*\}', response, re.DOTALL).group()
|
| 479 |
parsed_data = json.loads(json_str)
|
| 480 |
|
| 481 |
-
progress
|
|
|
|
| 482 |
return validate_parsed_data(parsed_data)
|
| 483 |
|
| 484 |
except torch.cuda.OutOfMemoryError:
|
|
@@ -828,7 +848,7 @@ class ProfileManager:
|
|
| 828 |
json.dump(data, f, indent=2, ensure_ascii=False)
|
| 829 |
|
| 830 |
# Upload to HF Hub if token is available
|
| 831 |
-
if HF_TOKEN:
|
| 832 |
try:
|
| 833 |
hf_api.upload_file(
|
| 834 |
path_or_fileobj=filepath,
|
|
@@ -867,7 +887,7 @@ class ProfileManager:
|
|
| 867 |
|
| 868 |
if not profile_file.exists():
|
| 869 |
# Try loading from HF Hub
|
| 870 |
-
if HF_TOKEN:
|
| 871 |
try:
|
| 872 |
hf_api.download_file(
|
| 873 |
path_in_repo=f"profiles/{profile_file.name}",
|
|
@@ -1379,7 +1399,7 @@ def create_interface():
|
|
| 1379 |
except Exception as e:
|
| 1380 |
logging.error(f"Upload error: {str(e)}")
|
| 1381 |
return (
|
| 1382 |
-
"Error processing transcript
|
| 1383 |
None,
|
| 1384 |
current_tab_status,
|
| 1385 |
gr.update(),
|
|
|
|
| 31 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 32 |
|
| 33 |
# Initialize logging
|
| 34 |
+
logging.basicConfig(
|
| 35 |
+
filename='app.log',
|
| 36 |
+
level=logging.INFO,
|
| 37 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
| 38 |
+
)
|
| 39 |
|
| 40 |
# Model configuration
|
| 41 |
MODEL_CHOICES = {
|
|
|
|
| 47 |
|
| 48 |
# Initialize Hugging Face API
|
| 49 |
if HF_TOKEN:
|
| 50 |
+
try:
|
| 51 |
+
hf_api = HfApi(token=HF_TOKEN)
|
| 52 |
+
HfFolder.save_token(HF_TOKEN)
|
| 53 |
+
except Exception as e:
|
| 54 |
+
logging.error(f"Failed to initialize Hugging Face API: {str(e)}")
|
| 55 |
|
| 56 |
# ========== OPTIMIZED MODEL LOADING ==========
|
| 57 |
class ModelLoader:
|
|
|
|
| 63 |
self.error = None
|
| 64 |
self.current_model = None
|
| 65 |
|
| 66 |
+
def load_model(self, model_name: str, progress: gr.Progress = None) -> Tuple[Optional[AutoModelForCausalLM], Optional[AutoTokenizer]]:
|
| 67 |
"""Lazy load the model with progress feedback"""
|
| 68 |
if self.loaded and self.current_model == model_name:
|
| 69 |
return self.model, self.tokenizer
|
| 70 |
|
| 71 |
self.loading = True
|
| 72 |
self.error = None
|
| 73 |
+
|
| 74 |
try:
|
| 75 |
+
if progress:
|
| 76 |
+
progress(0.1, desc="Initializing...")
|
| 77 |
|
| 78 |
# Clear previous model if any
|
| 79 |
if self.model:
|
|
|
|
| 93 |
if "TinyLlama" in model_name:
|
| 94 |
model_kwargs["attn_implementation"] = "flash_attention_2"
|
| 95 |
|
| 96 |
+
if progress:
|
| 97 |
+
progress(0.3, desc="Loading tokenizer...")
|
| 98 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 99 |
MODEL_CHOICES[model_name],
|
| 100 |
trust_remote_code=True
|
| 101 |
)
|
| 102 |
|
| 103 |
+
if progress:
|
| 104 |
+
progress(0.6, desc="Loading model...")
|
| 105 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 106 |
MODEL_CHOICES[model_name],
|
| 107 |
**model_kwargs
|
| 108 |
)
|
| 109 |
|
| 110 |
# Verify model responsiveness
|
| 111 |
+
if progress:
|
| 112 |
+
progress(0.8, desc="Verifying model...")
|
| 113 |
test_input = self.tokenizer("Test", return_tensors="pt").to(self.model.device)
|
| 114 |
_ = self.model.generate(**test_input, max_new_tokens=1)
|
| 115 |
|
| 116 |
self.model.eval() # Disable dropout
|
| 117 |
+
if progress:
|
| 118 |
+
progress(0.9, desc="Finalizing...")
|
| 119 |
self.loaded = True
|
| 120 |
self.current_model = model_name
|
| 121 |
return self.model, self.tokenizer
|
|
|
|
| 126 |
return None, None
|
| 127 |
except Exception as e:
|
| 128 |
self.error = str(e)
|
| 129 |
+
logging.error(f"Model loading error: {str(e)}")
|
| 130 |
return None, None
|
| 131 |
finally:
|
| 132 |
self.loading = False
|
|
|
|
| 407 |
|
| 408 |
# First try the structured parser
|
| 409 |
try:
|
| 410 |
+
if progress:
|
| 411 |
+
progress(0.1, desc="Parsing transcript structure...")
|
| 412 |
parser = TranscriptParser()
|
| 413 |
parsed_data = parser.parse_transcript(text)
|
| 414 |
+
if progress:
|
| 415 |
+
progress(0.9, desc="Formatting results...")
|
| 416 |
|
| 417 |
# Convert to expected format
|
| 418 |
formatted_data = {
|
|
|
|
| 435 |
"grade_level": course["grade_level"]
|
| 436 |
})
|
| 437 |
|
| 438 |
+
if progress:
|
| 439 |
+
progress(1.0)
|
| 440 |
return validate_parsed_data(formatted_data)
|
| 441 |
|
| 442 |
except Exception as e:
|
|
|
|
| 468 |
"""
|
| 469 |
|
| 470 |
try:
|
| 471 |
+
if progress:
|
| 472 |
+
progress(0.1, desc="Processing transcript with AI...")
|
| 473 |
|
| 474 |
# Tokenize and generate response
|
| 475 |
inputs = model_loader.tokenizer(prompt, return_tensors="pt").to(model_loader.model.device)
|
| 476 |
+
if progress:
|
| 477 |
+
progress(0.4)
|
| 478 |
|
| 479 |
outputs = model_loader.model.generate(
|
| 480 |
**inputs,
|
|
|
|
| 482 |
temperature=0.1,
|
| 483 |
do_sample=True
|
| 484 |
)
|
| 485 |
+
if progress:
|
| 486 |
+
progress(0.8)
|
| 487 |
|
| 488 |
# Decode the response
|
| 489 |
response = model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
| 497 |
json_str = re.search(r'\{.*\}', response, re.DOTALL).group()
|
| 498 |
parsed_data = json.loads(json_str)
|
| 499 |
|
| 500 |
+
if progress:
|
| 501 |
+
progress(1.0)
|
| 502 |
return validate_parsed_data(parsed_data)
|
| 503 |
|
| 504 |
except torch.cuda.OutOfMemoryError:
|
|
|
|
| 848 |
json.dump(data, f, indent=2, ensure_ascii=False)
|
| 849 |
|
| 850 |
# Upload to HF Hub if token is available
|
| 851 |
+
if HF_TOKEN and hf_api:
|
| 852 |
try:
|
| 853 |
hf_api.upload_file(
|
| 854 |
path_or_fileobj=filepath,
|
|
|
|
| 887 |
|
| 888 |
if not profile_file.exists():
|
| 889 |
# Try loading from HF Hub
|
| 890 |
+
if HF_TOKEN and hf_api:
|
| 891 |
try:
|
| 892 |
hf_api.download_file(
|
| 893 |
path_in_repo=f"profiles/{profile_file.name}",
|
|
|
|
| 1399 |
except Exception as e:
|
| 1400 |
logging.error(f"Upload error: {str(e)}")
|
| 1401 |
return (
|
| 1402 |
+
f"Error processing transcript: {str(e)}",
|
| 1403 |
None,
|
| 1404 |
current_tab_status,
|
| 1405 |
gr.update(),
|