Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,11 @@ from tabulate import tabulate
|
|
| 12 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 13 |
from multiprocessing import Pool, cpu_count
|
| 14 |
from flask import Flask, request, jsonify
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Paths for saving artifacts
|
| 17 |
MODEL_DIR = "./saved_models"
|
|
@@ -22,7 +27,16 @@ SKILL_TFIDF_PATH = os.path.join(MODEL_DIR, "skill_tfidf.pkl")
|
|
| 22 |
QUESTION_ANSWER_PATH = os.path.join(MODEL_DIR, "question_to_answer.pkl")
|
| 23 |
FAISS_INDEX_PATH = os.path.join(MODEL_DIR, "faiss_index.index")
|
| 24 |
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# Load Datasets
|
| 28 |
def load_dataset(file_path, required_columns=[]):
|
|
@@ -84,7 +98,7 @@ def initialize_resources(user_skills):
|
|
| 84 |
answer_embeddings = universal_model.encode(list(question_to_answer.values()), convert_to_tensor=True, show_progress_bar=False).cpu().numpy()
|
| 85 |
|
| 86 |
if not resources_valid(skill_tfidf.keys(), [s.lower() for s in user_skills]):
|
| 87 |
-
|
| 88 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
| 89 |
all_texts = user_skills + questions_df['Answer'].fillna("").tolist() + questions_df['Question'].tolist()
|
| 90 |
tfidf_vectorizer.fit(all_texts)
|
|
@@ -113,7 +127,7 @@ def initialize_resources(user_skills):
|
|
| 113 |
universal_model.save_pretrained(UNIVERSAL_MODEL_PATH)
|
| 114 |
detector_model.save_pretrained(DETECTOR_MODEL_PATH)
|
| 115 |
detector_tokenizer.save_pretrained(DETECTOR_MODEL_PATH)
|
| 116 |
-
|
| 117 |
|
| 118 |
# Evaluate Responses
|
| 119 |
def evaluate_response(args):
|
|
|
|
| 12 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 13 |
from multiprocessing import Pool, cpu_count
|
| 14 |
from flask import Flask, request, jsonify
|
| 15 |
+
import logging
|
| 16 |
+
|
| 17 |
+
# Set up logging
|
| 18 |
+
logging.basicConfig(level=logging.INFO)
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
|
| 21 |
# Paths for saving artifacts
|
| 22 |
MODEL_DIR = "./saved_models"
|
|
|
|
| 27 |
QUESTION_ANSWER_PATH = os.path.join(MODEL_DIR, "question_to_answer.pkl")
|
| 28 |
FAISS_INDEX_PATH = os.path.join(MODEL_DIR, "faiss_index.index")
|
| 29 |
|
| 30 |
+
# Ensure the directory exists with error handling
|
| 31 |
+
try:
|
| 32 |
+
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 33 |
+
logger.info(f"Successfully created/accessed directory: {MODEL_DIR}")
|
| 34 |
+
except PermissionError as e:
|
| 35 |
+
logger.error(f"Permission denied creating directory {MODEL_DIR}: {e}")
|
| 36 |
+
raise
|
| 37 |
+
except Exception as e:
|
| 38 |
+
logger.error(f"Unexpected error creating directory {MODEL_DIR}: {e}")
|
| 39 |
+
raise
|
| 40 |
|
| 41 |
# Load Datasets
|
| 42 |
def load_dataset(file_path, required_columns=[]):
|
|
|
|
| 98 |
answer_embeddings = universal_model.encode(list(question_to_answer.values()), convert_to_tensor=True, show_progress_bar=False).cpu().numpy()
|
| 99 |
|
| 100 |
if not resources_valid(skill_tfidf.keys(), [s.lower() for s in user_skills]):
|
| 101 |
+
logger.info("⚠ Saved skill TF-IDF mismatch detected. Recomputing resources.")
|
| 102 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
| 103 |
all_texts = user_skills + questions_df['Answer'].fillna("").tolist() + questions_df['Question'].tolist()
|
| 104 |
tfidf_vectorizer.fit(all_texts)
|
|
|
|
| 127 |
universal_model.save_pretrained(UNIVERSAL_MODEL_PATH)
|
| 128 |
detector_model.save_pretrained(DETECTOR_MODEL_PATH)
|
| 129 |
detector_tokenizer.save_pretrained(DETECTOR_MODEL_PATH)
|
| 130 |
+
logger.info(f"Models and resources saved to {MODEL_DIR}")
|
| 131 |
|
| 132 |
# Evaluate Responses
|
| 133 |
def evaluate_response(args):
|