Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,9 +38,19 @@ TFIDF_PATH = os.path.join(chosen_model_dir, "tfidf_vectorizer.pkl")
|
|
| 38 |
SKILL_TFIDF_PATH = os.path.join(chosen_model_dir, "skill_tfidf.pkl")
|
| 39 |
QUESTION_ANSWER_PATH = os.path.join(chosen_model_dir, "question_to_answer.pkl")
|
| 40 |
FAISS_INDEX_PATH = os.path.join(chosen_model_dir, "faiss_index.index")
|
|
|
|
| 41 |
COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
|
| 42 |
JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# Improved dataset loading with fallback
|
| 45 |
def load_dataset(file_path, required_columns=[], fallback_data=None):
|
| 46 |
try:
|
|
@@ -108,20 +118,13 @@ def load_universal_model():
|
|
| 108 |
logger.info(f"Loading universal model from {UNIVERSAL_MODEL_PATH}")
|
| 109 |
return SentenceTransformer(UNIVERSAL_MODEL_PATH)
|
| 110 |
else:
|
| 111 |
-
logger.info(f"Loading universal model:
|
| 112 |
-
model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 113 |
-
model.save(UNIVERSAL_MODEL_PATH)
|
| 114 |
-
return model
|
| 115 |
-
except Exception as e:
|
| 116 |
-
logger.error(f"Failed to load universal model all-MiniLM-L6-v2: {e}. Falling back to default.")
|
| 117 |
-
try:
|
| 118 |
-
logger.info(f"Loading fallback model: {default_model}")
|
| 119 |
model = SentenceTransformer(default_model)
|
| 120 |
model.save(UNIVERSAL_MODEL_PATH)
|
| 121 |
return model
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
|
| 126 |
universal_model = load_universal_model()
|
| 127 |
|
|
@@ -132,23 +135,16 @@ else:
|
|
| 132 |
detector_tokenizer = AutoTokenizer.from_pretrained("roberta-base-openai-detector")
|
| 133 |
detector_model = AutoModelForSequenceClassification.from_pretrained("roberta-base-openai-detector")
|
| 134 |
|
| 135 |
-
# Global variables for precomputed data
|
| 136 |
-
tfidf_vectorizer = None
|
| 137 |
-
skill_tfidf = None
|
| 138 |
-
question_to_answer = None
|
| 139 |
-
faiss_index = None
|
| 140 |
-
course_similarity = None
|
| 141 |
-
job_similarity = None
|
| 142 |
-
|
| 143 |
# Load Precomputed Resources
|
| 144 |
def load_precomputed_resources():
|
| 145 |
-
global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, course_similarity, job_similarity
|
| 146 |
-
if all(os.path.exists(p) for p in [TFIDF_PATH, SKILL_TFIDF_PATH, QUESTION_ANSWER_PATH, FAISS_INDEX_PATH, COURSE_SIMILARITY_PATH, JOB_SIMILARITY_PATH]):
|
| 147 |
try:
|
| 148 |
with open(TFIDF_PATH, 'rb') as f: tfidf_vectorizer = pickle.load(f)
|
| 149 |
with open(SKILL_TFIDF_PATH, 'rb') as f: skill_tfidf = pickle.load(f)
|
| 150 |
with open(QUESTION_ANSWER_PATH, 'rb') as f: question_to_answer = pickle.load(f)
|
| 151 |
faiss_index = faiss.read_index(FAISS_INDEX_PATH)
|
|
|
|
| 152 |
with open(COURSE_SIMILARITY_PATH, 'rb') as f: course_similarity = pickle.load(f)
|
| 153 |
with open(JOB_SIMILARITY_PATH, 'rb') as f: job_similarity = pickle.load(f)
|
| 154 |
logger.info("Loaded precomputed resources successfully")
|
|
@@ -160,7 +156,7 @@ def load_precomputed_resources():
|
|
| 160 |
|
| 161 |
# Precompute Resources Offline (to be run separately)
|
| 162 |
def precompute_resources():
|
| 163 |
-
global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, course_similarity, job_similarity
|
| 164 |
logger.info("Precomputing resources offline")
|
| 165 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
| 166 |
all_texts = questions_df['Answer'].tolist() + questions_df['Question'].tolist()
|
|
@@ -189,6 +185,7 @@ def precompute_resources():
|
|
| 189 |
with open(SKILL_TFIDF_PATH, 'wb') as f: pickle.dump(skill_tfidf, f)
|
| 190 |
with open(QUESTION_ANSWER_PATH, 'wb') as f: pickle.dump(question_to_answer, f)
|
| 191 |
faiss.write_index(faiss_index, FAISS_INDEX_PATH)
|
|
|
|
| 192 |
with open(COURSE_SIMILARITY_PATH, 'wb') as f: pickle.dump(course_similarity, f)
|
| 193 |
with open(JOB_SIMILARITY_PATH, 'wb') as f: pickle.dump(job_similarity, f)
|
| 194 |
universal_model.save(UNIVERSAL_MODEL_PATH)
|
|
@@ -232,7 +229,7 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
|
|
| 232 |
return []
|
| 233 |
|
| 234 |
similarities = course_similarity[skill_indices]
|
| 235 |
-
total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * courses_df
|
| 236 |
|
| 237 |
target_level = 'Advanced' if upgrade else user_level
|
| 238 |
idx = np.argsort(-total_scores)[:5]
|
|
@@ -294,7 +291,7 @@ def assess_skills():
|
|
| 294 |
if len(answers) != len(user_skills):
|
| 295 |
return jsonify({"error": "Answers count must match skills count"}), 400
|
| 296 |
|
| 297 |
-
load_precomputed_resources()
|
| 298 |
|
| 299 |
user_questions = []
|
| 300 |
for skill in user_skills:
|
|
|
|
| 38 |
SKILL_TFIDF_PATH = os.path.join(chosen_model_dir, "skill_tfidf.pkl")
|
| 39 |
QUESTION_ANSWER_PATH = os.path.join(chosen_model_dir, "question_to_answer.pkl")
|
| 40 |
FAISS_INDEX_PATH = os.path.join(chosen_model_dir, "faiss_index.index")
|
| 41 |
+
ANSWER_EMBEDDINGS_PATH = os.path.join(chosen_model_dir, "answer_embeddings.pkl")
|
| 42 |
COURSE_SIMILARITY_PATH = os.path.join(chosen_model_dir, "course_similarity.pkl")
|
| 43 |
JOB_SIMILARITY_PATH = os.path.join(chosen_model_dir, "job_similarity.pkl")
|
| 44 |
|
| 45 |
+
# Global variables for precomputed data
|
| 46 |
+
tfidf_vectorizer = None
|
| 47 |
+
skill_tfidf = None
|
| 48 |
+
question_to_answer = None
|
| 49 |
+
faiss_index = None
|
| 50 |
+
answer_embeddings = None
|
| 51 |
+
course_similarity = None
|
| 52 |
+
job_similarity = None
|
| 53 |
+
|
| 54 |
# Improved dataset loading with fallback
|
| 55 |
def load_dataset(file_path, required_columns=[], fallback_data=None):
|
| 56 |
try:
|
|
|
|
| 118 |
logger.info(f"Loading universal model from {UNIVERSAL_MODEL_PATH}")
|
| 119 |
return SentenceTransformer(UNIVERSAL_MODEL_PATH)
|
| 120 |
else:
|
| 121 |
+
logger.info(f"Loading universal model: {default_model}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
model = SentenceTransformer(default_model)
|
| 123 |
model.save(UNIVERSAL_MODEL_PATH)
|
| 124 |
return model
|
| 125 |
+
except Exception as e:
|
| 126 |
+
logger.error(f"Failed to load universal model {default_model}: {e}. Exiting.")
|
| 127 |
+
exit(1)
|
| 128 |
|
| 129 |
universal_model = load_universal_model()
|
| 130 |
|
|
|
|
| 135 |
detector_tokenizer = AutoTokenizer.from_pretrained("roberta-base-openai-detector")
|
| 136 |
detector_model = AutoModelForSequenceClassification.from_pretrained("roberta-base-openai-detector")
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# Load Precomputed Resources
|
| 139 |
def load_precomputed_resources():
|
| 140 |
+
global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity
|
| 141 |
+
if all(os.path.exists(p) for p in [TFIDF_PATH, SKILL_TFIDF_PATH, QUESTION_ANSWER_PATH, FAISS_INDEX_PATH, ANSWER_EMBEDDINGS_PATH, COURSE_SIMILARITY_PATH, JOB_SIMILARITY_PATH]):
|
| 142 |
try:
|
| 143 |
with open(TFIDF_PATH, 'rb') as f: tfidf_vectorizer = pickle.load(f)
|
| 144 |
with open(SKILL_TFIDF_PATH, 'rb') as f: skill_tfidf = pickle.load(f)
|
| 145 |
with open(QUESTION_ANSWER_PATH, 'rb') as f: question_to_answer = pickle.load(f)
|
| 146 |
faiss_index = faiss.read_index(FAISS_INDEX_PATH)
|
| 147 |
+
with open(ANSWER_EMBEDDINGS_PATH, 'rb') as f: answer_embeddings = pickle.load(f)
|
| 148 |
with open(COURSE_SIMILARITY_PATH, 'rb') as f: course_similarity = pickle.load(f)
|
| 149 |
with open(JOB_SIMILARITY_PATH, 'rb') as f: job_similarity = pickle.load(f)
|
| 150 |
logger.info("Loaded precomputed resources successfully")
|
|
|
|
| 156 |
|
| 157 |
# Precompute Resources Offline (to be run separately)
|
| 158 |
def precompute_resources():
|
| 159 |
+
global tfidf_vectorizer, skill_tfidf, question_to_answer, faiss_index, answer_embeddings, course_similarity, job_similarity
|
| 160 |
logger.info("Precomputing resources offline")
|
| 161 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
| 162 |
all_texts = questions_df['Answer'].tolist() + questions_df['Question'].tolist()
|
|
|
|
| 185 |
with open(SKILL_TFIDF_PATH, 'wb') as f: pickle.dump(skill_tfidf, f)
|
| 186 |
with open(QUESTION_ANSWER_PATH, 'wb') as f: pickle.dump(question_to_answer, f)
|
| 187 |
faiss.write_index(faiss_index, FAISS_INDEX_PATH)
|
| 188 |
+
with open(ANSWER_EMBEDDINGS_PATH, 'wb') as f: pickle.dump(answer_embeddings, f)
|
| 189 |
with open(COURSE_SIMILARITY_PATH, 'wb') as f: pickle.dump(course_similarity, f)
|
| 190 |
with open(JOB_SIMILARITY_PATH, 'wb') as f: pickle.dump(job_similarity, f)
|
| 191 |
universal_model.save(UNIVERSAL_MODEL_PATH)
|
|
|
|
| 229 |
return []
|
| 230 |
|
| 231 |
similarities = course_similarity[skill_indices]
|
| 232 |
+
total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * courses_df['popularity'].values + 0.2 * courses_df['completion_rate'].values
|
| 233 |
|
| 234 |
target_level = 'Advanced' if upgrade else user_level
|
| 235 |
idx = np.argsort(-total_scores)[:5]
|
|
|
|
| 291 |
if len(answers) != len(user_skills):
|
| 292 |
return jsonify({"error": "Answers count must match skills count"}), 400
|
| 293 |
|
| 294 |
+
load_precomputed_resources() # Load precomputed resources before processing
|
| 295 |
|
| 296 |
user_questions = []
|
| 297 |
for skill in user_skills:
|