Commit
·
293022c
1
Parent(s):
571537c
Content based recommender completed
Browse files
.gitignore
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
-
ars/
|
2 |
-
|
3 |
.env
|
4 |
Artifacts/
|
5 |
logs/
|
6 |
-
__pycache__/
|
7 |
-
|
8 |
model_trainer/
|
|
|
1 |
+
ars/
|
|
|
2 |
.env
|
3 |
Artifacts/
|
4 |
logs/
|
5 |
+
__pycache__/
|
|
|
6 |
model_trainer/
|
anime_recommender/entity/artifact_entity.py
CHANGED
@@ -5,6 +5,14 @@ from typing import Optional
|
|
5 |
class DataIngestionArtifact:
|
6 |
feature_store_anime_file_path:str
|
7 |
feature_store_userrating_file_path:str
|
|
|
8 |
@dataclass
|
9 |
class DataTransformationArtifact:
|
10 |
-
merged_file_path:str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
class DataIngestionArtifact:
|
6 |
feature_store_anime_file_path:str
|
7 |
feature_store_userrating_file_path:str
|
8 |
+
|
9 |
@dataclass
|
10 |
class DataTransformationArtifact:
|
11 |
+
merged_file_path:str
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
@dataclass
|
17 |
+
class ContentBasedModelArtifact:
|
18 |
+
cosine_similarity_model_file_path:str
|
anime_recommender/entity/config_entity.py
CHANGED
@@ -40,3 +40,15 @@ class DataTransformationConfig:
|
|
40 |
"""
|
41 |
self.data_transformation_dir:str = os.path.join(training_pipeline_config.artifact_dir,DATA_TRANSFORMATION_DIR)
|
42 |
self.merged_file_path:str = os.path.join(self.data_transformation_dir,DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR,MERGED_FILE_NAME)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
"""
|
41 |
self.data_transformation_dir:str = os.path.join(training_pipeline_config.artifact_dir,DATA_TRANSFORMATION_DIR)
|
42 |
self.merged_file_path:str = os.path.join(self.data_transformation_dir,DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR,MERGED_FILE_NAME)
|
43 |
+
|
44 |
+
|
45 |
+
class ContentBasedModelConfig:
|
46 |
+
"""
|
47 |
+
Configuration for model training, including paths for trained models.
|
48 |
+
"""
|
49 |
+
def __init__(self,training_pipeline_config:TrainingPipelineConfig):
|
50 |
+
"""
|
51 |
+
Initialize model trainer paths.
|
52 |
+
"""
|
53 |
+
self.model_trainer_dir:str = os.path.join(training_pipeline_config.artifact_dir,MODEL_TRAINER_DIR_NAME)
|
54 |
+
self.cosine_similarity_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_CON_TRAINED_MODEL_DIR,MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
|
anime_recommender/source/content_based_recommender.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
from anime_recommender.loggers.logging import logging
|
3 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
+
from anime_recommender.entity.config_entity import ContentBasedModelConfig
|
5 |
+
from anime_recommender.entity.artifact_entity import ContentBasedModelArtifact, DataIngestionArtifact
|
6 |
+
from anime_recommender.utils.main_utils.utils import load_csv_data
|
7 |
+
from anime_recommender.model_trainer.content_filtering import ContentBasedRecommender
|
8 |
+
from anime_recommender.constant import *
|
9 |
+
|
10 |
+
class ContentBasedModelTrainer:
|
11 |
+
"""Class to train the model, track metrics, and save the trained model."""
|
12 |
+
|
13 |
+
def __init__(self, content_based_model_trainer_config: ContentBasedModelConfig, data_ingestion_artifact: DataIngestionArtifact):
|
14 |
+
try:
|
15 |
+
self.content_based_model_trainer_config = content_based_model_trainer_config
|
16 |
+
self.data_ingestion_artifact = data_ingestion_artifact
|
17 |
+
except Exception as e:
|
18 |
+
raise AnimeRecommendorException(e, sys)
|
19 |
+
|
20 |
+
def initiate_model_trainer(self) -> ContentBasedModelArtifact:
|
21 |
+
try:
|
22 |
+
logging.info("Loading ingested data...")
|
23 |
+
df = load_csv_data(self.data_ingestion_artifact.feature_store_anime_file_path)
|
24 |
+
logging.info("Training ContentBasedRecommender model...")
|
25 |
+
|
26 |
+
# Initialize and train the model
|
27 |
+
recommender = ContentBasedRecommender(df=df )
|
28 |
+
|
29 |
+
# Save the model (TF-IDF and cosine similarity matrix)
|
30 |
+
recommender.save_model(self.content_based_model_trainer_config.cosine_similarity_model_file_path)
|
31 |
+
logging.info("Model saved successfully.")
|
32 |
+
|
33 |
+
logging.info("Loading saved model to get recommendations...")
|
34 |
+
cosine_recommendations = recommender.get_rec_cosine(title="One Piece", model_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path, n_recommendations=10)
|
35 |
+
logging.info(f"Cosine similarity recommendations: {cosine_recommendations}")
|
36 |
+
|
37 |
+
# Return artifact with saved model path
|
38 |
+
content_model_trainer_artifact = ContentBasedModelArtifact(
|
39 |
+
cosine_similarity_model_file_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path
|
40 |
+
)
|
41 |
+
return content_model_trainer_artifact
|
42 |
+
except Exception as e:
|
43 |
+
raise AnimeRecommendorException(f"Error in ContentBasedModelTrainer: {str(e)}", sys)
|
run_pipeline.py
CHANGED
@@ -2,11 +2,11 @@ import sys
|
|
2 |
from anime_recommender.loggers.logging import logging
|
3 |
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
from anime_recommender.source.data_ingestion import DataIngestion
|
5 |
-
from anime_recommender.entity.config_entity import TrainingPipelineConfig,DataIngestionConfig,DataTransformationConfig
|
6 |
-
# ,DataTransformationConfig,CollaborativeModelConfig
|
7 |
from anime_recommender.source.data_transformation import DataTransformation
|
8 |
# from anime_recommender.source.collaborative_recommenders import CollaborativeModelTrainer
|
9 |
-
|
10 |
# from anime_recommender.source.popularity_based_recommenders import PopularityBasedRecommendor
|
11 |
|
12 |
if __name__ == "__main__":
|
@@ -35,13 +35,13 @@ if __name__ == "__main__":
|
|
35 |
# logging.info("Collaborative Model training completed.")
|
36 |
# print(collaborative_model_trainer_artifact)
|
37 |
|
38 |
-
#
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
|
46 |
# # Popularity Based Filtering
|
47 |
# logging.info("Initiating Popularity based filtering.")
|
|
|
2 |
from anime_recommender.loggers.logging import logging
|
3 |
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
from anime_recommender.source.data_ingestion import DataIngestion
|
5 |
+
from anime_recommender.entity.config_entity import TrainingPipelineConfig,DataIngestionConfig,DataTransformationConfig,ContentBasedModelConfig
|
6 |
+
# ,DataTransformationConfig,CollaborativeModelConfig
|
7 |
from anime_recommender.source.data_transformation import DataTransformation
|
8 |
# from anime_recommender.source.collaborative_recommenders import CollaborativeModelTrainer
|
9 |
+
from anime_recommender.source.content_based_recommender import ContentBasedModelTrainer
|
10 |
# from anime_recommender.source.popularity_based_recommenders import PopularityBasedRecommendor
|
11 |
|
12 |
if __name__ == "__main__":
|
|
|
35 |
# logging.info("Collaborative Model training completed.")
|
36 |
# print(collaborative_model_trainer_artifact)
|
37 |
|
38 |
+
# Content Based Model Training
|
39 |
+
content_based_model_trainer_config = ContentBasedModelConfig(training_pipeline_config)
|
40 |
+
content_based_model_trainer = ContentBasedModelTrainer(content_based_model_trainer_config=content_based_model_trainer_config,data_ingestion_artifact=data_ingestion_artifact)
|
41 |
+
logging.info("Initiating Content Based Model training.")
|
42 |
+
content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
|
43 |
+
logging.info("Content Based Model training completed.")
|
44 |
+
print(content_based_model_trainer_artifact)
|
45 |
|
46 |
# # Popularity Based Filtering
|
47 |
# logging.info("Initiating Popularity based filtering.")
|