File size: 3,544 Bytes
b4f6ffc c3c7748 b4f6ffc c3c7748 b4f6ffc 293022c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import sys
from anime_recommender.loggers.logging import logging
from anime_recommender.exception.exception import AnimeRecommendorException
from anime_recommender.entity.config_entity import ContentBasedModelConfig
from anime_recommender.entity.artifact_entity import ContentBasedModelArtifact, DataIngestionArtifact
from anime_recommender.utils.main_utils.utils import load_csv_data, upload_model_to_huggingface
from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
from anime_recommender.constant import *
class ContentBasedModelTrainer:
"""
A class responsible for training and saving the content-based recommender model.
"""
def __init__(self, content_based_model_trainer_config: ContentBasedModelConfig, data_ingestion_artifact: DataIngestionArtifact):
"""
Initializes the ContentBasedModelTrainer with configuration and data ingestion artifacts.
Args:
content_based_model_trainer_config (ContentBasedModelConfig): Configuration settings for model training.
data_ingestion_artifact (DataIngestionArtifact): Data ingestion artifact containing the dataset path.
"""
try:
self.content_based_model_trainer_config = content_based_model_trainer_config
self.data_ingestion_artifact = data_ingestion_artifact
except Exception as e:
raise AnimeRecommendorException(e, sys)
def initiate_model_trainer(self) -> ContentBasedModelArtifact:
"""
Trains the content-based recommender model using TF-IDF and cosine similarity,
saves the trained model, and retrieves recommendations.
Returns:
ContentBasedModelArtifact: Object containing the path to the saved content-based model.
"""
try:
logging.info("Loading ingested data...")
df = load_csv_data(self.data_ingestion_artifact.feature_store_anime_file_path)
logging.info("Training ContentBasedRecommender model...")
# Initialize and train the model
recommender = ContentBasedRecommender(df=df )
# Save the model (TF-IDF and cosine similarity matrix)
recommender.save_model(model_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path)
upload_model_to_huggingface(
model_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path,
repo_id=MODELS_FILEPATH,
filename=MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME
)
logging.info("Model saved successfully.")
logging.info("Loading saved model to get recommendations...")
cosine_recommendations = recommender.get_rec_cosine(title="One Piece", model_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path, n_recommendations=10)
logging.info(f"Cosine similarity recommendations: {cosine_recommendations}")
# Return artifact with saved model path
content_model_trainer_artifact = ContentBasedModelArtifact(
cosine_similarity_model_file_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path
)
return content_model_trainer_artifact
except Exception as e:
raise AnimeRecommendorException(f"Error in ContentBasedModelTrainer: {str(e)}", sys) |