Commit
Β·
b4f6ffc
1
Parent(s):
c2f0782
Updated all files
Browse files- .gitignore +4 -4
- Dockerfile +16 -16
- anime_recommender/{source β components}/__init__.py +0 -0
- anime_recommender/{source β components}/collaborative_recommender.py +90 -72
- anime_recommender/{source β components}/content_based_recommender.py +57 -42
- anime_recommender/{source β components}/data_ingestion.py +82 -58
- anime_recommender/{source β components}/data_transformation.py +108 -114
- anime_recommender/{source β components}/top_anime_recommenders.py +74 -52
- anime_recommender/constant/__init__.py +39 -39
- anime_recommender/entity/artifact_entity.py +20 -20
- anime_recommender/entity/config_entity.py +65 -65
- anime_recommender/exception/exception.py +43 -43
- anime_recommender/loggers/logging.py +15 -15
- anime_recommender/model_trainer/collaborative_modelling.py +263 -183
- anime_recommender/model_trainer/content_based_modelling.py +72 -70
- anime_recommender/model_trainer/top_anime_filtering.py +103 -92
- anime_recommender/pipelines/training_pipeline.py +151 -151
- anime_recommender/utils/main_utils/utils.py +89 -47
- app.py +233 -233
- notebooks/EDA.ipynb +0 -0
- notebooks/final_ARS.ipynb +0 -0
- requirements.txt +5 -0
- run_pipeline.py +52 -52
- setup.py +28 -28
.gitignore
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
|
2 |
-
.env
|
3 |
-
Artifacts/
|
4 |
-
logs/
|
5 |
__pycache__/
|
|
|
1 |
+
anime/
|
2 |
+
.env
|
3 |
+
Artifacts/
|
4 |
+
logs/
|
5 |
__pycache__/
|
Dockerfile
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
-
# Use the official Python image as a base
|
2 |
-
FROM python:3.10-slim-buster
|
3 |
-
|
4 |
-
# Set the working directory in the container
|
5 |
-
WORKDIR /app
|
6 |
-
|
7 |
-
# Copy the app files into the container
|
8 |
-
COPY . .
|
9 |
-
|
10 |
-
# Install required packages
|
11 |
-
RUN pip install -r requirements.txt
|
12 |
-
|
13 |
-
# Expose the port that Streamlit uses
|
14 |
-
EXPOSE 8501
|
15 |
-
|
16 |
-
# Run the Streamlit app
|
17 |
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
|
|
1 |
+
# Use the official Python image as a base
|
2 |
+
FROM python:3.10-slim-buster
|
3 |
+
|
4 |
+
# Set the working directory in the container
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Copy the app files into the container
|
8 |
+
COPY . .
|
9 |
+
|
10 |
+
# Install required packages
|
11 |
+
RUN pip install -r requirements.txt
|
12 |
+
|
13 |
+
# Expose the port that Streamlit uses
|
14 |
+
EXPOSE 8501
|
15 |
+
|
16 |
+
# Run the Streamlit app
|
17 |
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
anime_recommender/{source β components}/__init__.py
RENAMED
File without changes
|
anime_recommender/{source β components}/collaborative_recommender.py
RENAMED
@@ -1,73 +1,91 @@
|
|
1 |
-
import sys
|
2 |
-
from anime_recommender.loggers.logging import logging
|
3 |
-
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
-
from anime_recommender.entity.config_entity import CollaborativeModelConfig
|
5 |
-
from anime_recommender.entity.artifact_entity import DataTransformationArtifact, CollaborativeModelArtifact
|
6 |
-
from anime_recommender.utils.main_utils.utils import load_csv_data, save_model, load_object
|
7 |
-
from anime_recommender.model_trainer.collaborative_modelling import CollaborativeAnimeRecommender
|
8 |
-
|
9 |
-
class CollaborativeModelTrainer:
|
10 |
-
"""
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
)
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
)
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
logging.info(
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
raise AnimeRecommendorException(f"Error in CollaborativeModelTrainer: {str(e)}", sys)
|
|
|
1 |
+
import sys
|
2 |
+
from anime_recommender.loggers.logging import logging
|
3 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
+
from anime_recommender.entity.config_entity import CollaborativeModelConfig
|
5 |
+
from anime_recommender.entity.artifact_entity import DataTransformationArtifact, CollaborativeModelArtifact
|
6 |
+
from anime_recommender.utils.main_utils.utils import load_csv_data, save_model, load_object
|
7 |
+
from anime_recommender.model_trainer.collaborative_modelling import CollaborativeAnimeRecommender
|
8 |
+
|
9 |
+
class CollaborativeModelTrainer:
|
10 |
+
"""
|
11 |
+
Trains and saves collaborative filtering recommendation models.
|
12 |
+
|
13 |
+
This class supports three types of models:
|
14 |
+
- Singular Value Decomposition (SVD)
|
15 |
+
- Item-based K-Nearest Neighbors (KNN)
|
16 |
+
- User-based K-Nearest Neighbors (KNN)
|
17 |
+
"""
|
18 |
+
def __init__(self, collaborative_model_trainer_config: CollaborativeModelConfig, data_transformation_artifact: DataTransformationArtifact):
|
19 |
+
"""
|
20 |
+
Initializes the CollaborativeModelTrainer with configuration and transformed data.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
collaborative_model_trainer_config (CollaborativeModelConfig): Configuration settings for model training.
|
24 |
+
data_transformation_artifact (DataTransformationArtifact): Data artifact containing the preprocessed dataset path.
|
25 |
+
"""
|
26 |
+
try:
|
27 |
+
self.collaborative_model_trainer_config = collaborative_model_trainer_config
|
28 |
+
self.data_transformation_artifact = data_transformation_artifact
|
29 |
+
except Exception as e:
|
30 |
+
raise AnimeRecommendorException(e, sys)
|
31 |
+
|
32 |
+
def initiate_model_trainer(self, model_type: str) -> CollaborativeModelArtifact:
|
33 |
+
"""
|
34 |
+
Trains and saves the specified collaborative filtering model.
|
35 |
+
Args:
|
36 |
+
model_type (str): The type of model to train.
|
37 |
+
Choices: 'svd', 'item_knn', 'user_knn'.
|
38 |
+
Returns:
|
39 |
+
CollaborativeModelArtifact: Object containing the file path of the trained model.
|
40 |
+
"""
|
41 |
+
try:
|
42 |
+
logging.info("Loading transformed data...")
|
43 |
+
df = load_csv_data(self.data_transformation_artifact.merged_file_path)
|
44 |
+
recommender = CollaborativeAnimeRecommender(df)
|
45 |
+
|
46 |
+
if model_type == 'svd':
|
47 |
+
logging.info("Training and saving SVD model...")
|
48 |
+
recommender.train_svd()
|
49 |
+
save_model(recommender.svd, self.collaborative_model_trainer_config.svd_trained_model_file_path)
|
50 |
+
|
51 |
+
logging.info("Loading pre-trained SVD model...")
|
52 |
+
svd_model = load_object(self.collaborative_model_trainer_config.svd_trained_model_file_path)
|
53 |
+
svd_recommendations = recommender.get_svd_recommendations(user_id=436, n=10, svd_model=svd_model)
|
54 |
+
logging.info(f"SVD recommendations: {svd_recommendations}")
|
55 |
+
return CollaborativeModelArtifact(
|
56 |
+
svd_file_path=self.collaborative_model_trainer_config.svd_trained_model_file_path
|
57 |
+
)
|
58 |
+
|
59 |
+
elif model_type == 'item_knn':
|
60 |
+
logging.info("Training and saving KNN item-based model...")
|
61 |
+
recommender.train_knn_item_based()
|
62 |
+
save_model(recommender.knn_item_based, self.collaborative_model_trainer_config.item_knn_trained_model_file_path)
|
63 |
+
|
64 |
+
logging.info("Loading pre-trained item-based KNN model...")
|
65 |
+
item_knn_model = load_object(self.collaborative_model_trainer_config.item_knn_trained_model_file_path)
|
66 |
+
item_based_recommendations = recommender.get_item_based_recommendations(
|
67 |
+
anime_name='One Piece', n_recommendations=10, knn_item_model=item_knn_model
|
68 |
+
)
|
69 |
+
logging.info(f"Item Based recommendations: {item_based_recommendations}")
|
70 |
+
return CollaborativeModelArtifact(
|
71 |
+
item_based_knn_file_path=self.collaborative_model_trainer_config.item_knn_trained_model_file_path
|
72 |
+
)
|
73 |
+
|
74 |
+
elif model_type == 'user_knn':
|
75 |
+
logging.info("Training and saving KNN user-based model...")
|
76 |
+
recommender.train_knn_user_based()
|
77 |
+
save_model(recommender.knn_user_based, self.collaborative_model_trainer_config.user_knn_trained_model_file_path)
|
78 |
+
|
79 |
+
logging.info("Loading pre-trained user-based KNN model...")
|
80 |
+
user_knn_model = load_object(self.collaborative_model_trainer_config.user_knn_trained_model_file_path)
|
81 |
+
user_based_recommendations = recommender.get_user_based_recommendations(
|
82 |
+
user_id=817, n_recommendations=10, knn_user_model=user_knn_model
|
83 |
+
)
|
84 |
+
logging.info(f"User Based recommendations: {user_based_recommendations}")
|
85 |
+
return CollaborativeModelArtifact(
|
86 |
+
user_based_knn_file_path=self.collaborative_model_trainer_config.user_knn_trained_model_file_path
|
87 |
+
)
|
88 |
+
else:
|
89 |
+
raise ValueError("Invalid model_type. Choose from 'svd', 'item_knn', or 'user_knn'.")
|
90 |
+
except Exception as e:
|
91 |
raise AnimeRecommendorException(f"Error in CollaborativeModelTrainer: {str(e)}", sys)
|
anime_recommender/{source β components}/content_based_recommender.py
RENAMED
@@ -1,43 +1,58 @@
|
|
1 |
-
import sys
|
2 |
-
from anime_recommender.loggers.logging import logging
|
3 |
-
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
-
from anime_recommender.entity.config_entity import ContentBasedModelConfig
|
5 |
-
from anime_recommender.entity.artifact_entity import ContentBasedModelArtifact, DataIngestionArtifact
|
6 |
-
from anime_recommender.utils.main_utils.utils import load_csv_data
|
7 |
-
from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
|
8 |
-
from anime_recommender.constant import *
|
9 |
-
|
10 |
-
class ContentBasedModelTrainer:
|
11 |
-
"""
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
raise AnimeRecommendorException(f"Error in ContentBasedModelTrainer: {str(e)}", sys)
|
|
|
1 |
+
import sys
|
2 |
+
from anime_recommender.loggers.logging import logging
|
3 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
+
from anime_recommender.entity.config_entity import ContentBasedModelConfig
|
5 |
+
from anime_recommender.entity.artifact_entity import ContentBasedModelArtifact, DataIngestionArtifact
|
6 |
+
from anime_recommender.utils.main_utils.utils import load_csv_data
|
7 |
+
from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
|
8 |
+
from anime_recommender.constant import *
|
9 |
+
|
10 |
+
class ContentBasedModelTrainer:
|
11 |
+
"""
|
12 |
+
A class responsible for training and saving the content-based recommender model.
|
13 |
+
"""
|
14 |
+
def __init__(self, content_based_model_trainer_config: ContentBasedModelConfig, data_ingestion_artifact: DataIngestionArtifact):
|
15 |
+
"""
|
16 |
+
Initializes the ContentBasedModelTrainer with configuration and data ingestion artifacts.
|
17 |
+
|
18 |
+
Args:
|
19 |
+
content_based_model_trainer_config (ContentBasedModelConfig): Configuration settings for model training.
|
20 |
+
data_ingestion_artifact (DataIngestionArtifact): Data ingestion artifact containing the dataset path.
|
21 |
+
"""
|
22 |
+
try:
|
23 |
+
self.content_based_model_trainer_config = content_based_model_trainer_config
|
24 |
+
self.data_ingestion_artifact = data_ingestion_artifact
|
25 |
+
except Exception as e:
|
26 |
+
raise AnimeRecommendorException(e, sys)
|
27 |
+
|
28 |
+
def initiate_model_trainer(self) -> ContentBasedModelArtifact:
|
29 |
+
"""
|
30 |
+
Trains the content-based recommender model using TF-IDF and cosine similarity,
|
31 |
+
saves the trained model, and retrieves recommendations.
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
ContentBasedModelArtifact: Object containing the path to the saved content-based model.
|
35 |
+
"""
|
36 |
+
try:
|
37 |
+
logging.info("Loading ingested data...")
|
38 |
+
df = load_csv_data(self.data_ingestion_artifact.feature_store_anime_file_path)
|
39 |
+
logging.info("Training ContentBasedRecommender model...")
|
40 |
+
|
41 |
+
# Initialize and train the model
|
42 |
+
recommender = ContentBasedRecommender(df=df )
|
43 |
+
|
44 |
+
# Save the model (TF-IDF and cosine similarity matrix)
|
45 |
+
recommender.save_model(self.content_based_model_trainer_config.cosine_similarity_model_file_path)
|
46 |
+
logging.info("Model saved successfully.")
|
47 |
+
|
48 |
+
logging.info("Loading saved model to get recommendations...")
|
49 |
+
cosine_recommendations = recommender.get_rec_cosine(title="One Piece", model_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path, n_recommendations=10)
|
50 |
+
logging.info(f"Cosine similarity recommendations: {cosine_recommendations}")
|
51 |
+
|
52 |
+
# Return artifact with saved model path
|
53 |
+
content_model_trainer_artifact = ContentBasedModelArtifact(
|
54 |
+
cosine_similarity_model_file_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path
|
55 |
+
)
|
56 |
+
return content_model_trainer_artifact
|
57 |
+
except Exception as e:
|
58 |
raise AnimeRecommendorException(f"Error in ContentBasedModelTrainer: {str(e)}", sys)
|
anime_recommender/{source β components}/data_ingestion.py
RENAMED
@@ -1,58 +1,82 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
-
|
4 |
-
from
|
5 |
-
from anime_recommender.
|
6 |
-
from anime_recommender.
|
7 |
-
from anime_recommender.entity.
|
8 |
-
from anime_recommender.
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
# Load
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import pandas as pd
|
3 |
+
from datasets import load_dataset
|
4 |
+
from anime_recommender.loggers.logging import logging
|
5 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
6 |
+
from anime_recommender.entity.config_entity import DataIngestionConfig
|
7 |
+
from anime_recommender.entity.artifact_entity import DataIngestionArtifact
|
8 |
+
from anime_recommender.utils.main_utils.utils import export_data_to_dataframe
|
9 |
+
|
10 |
+
class DataIngestion:
|
11 |
+
"""
|
12 |
+
A class responsible for data ingestion in the anime recommender system.
|
13 |
+
|
14 |
+
This class fetches data from Hugging Face datasets, converts it into pandas DataFrame format,
|
15 |
+
and exports the processed data to storage for further use in the pipeline.
|
16 |
+
"""
|
17 |
+
def __init__(self, data_ingestion_config: DataIngestionConfig):
|
18 |
+
"""
|
19 |
+
Initializes the DataIngestion class with the provided configuration.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
data_ingestion_config (DataIngestionConfig): Configuration settings for data ingestion.
|
23 |
+
"""
|
24 |
+
try:
|
25 |
+
self.data_ingestion_config = data_ingestion_config
|
26 |
+
except Exception as e:
|
27 |
+
raise AnimeRecommendorException(e, sys)
|
28 |
+
|
29 |
+
def fetch_data_from_huggingface(self, dataset_path: str, split: str = None) -> pd.DataFrame:
|
30 |
+
"""
|
31 |
+
Fetches a dataset from Hugging Face and converts it into a pandas DataFrame.
|
32 |
+
Args:
|
33 |
+
dataset_path (str): The path to the Hugging Face dataset.
|
34 |
+
split (str, optional): The dataset split to be fetched (e.g., 'train', 'test'). Defaults to None.
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
pd.DataFrame: The dataset converted into a pandas DataFrame.
|
38 |
+
"""
|
39 |
+
try:
|
40 |
+
logging.info(f"Fetching data from Hugging Face dataset: {dataset_path}")
|
41 |
+
# Load dataset from Hugging Face
|
42 |
+
dataset = load_dataset(dataset_path, split=split)
|
43 |
+
|
44 |
+
# Convert dataset to pandas DataFrame
|
45 |
+
df = pd.DataFrame(dataset['train'])
|
46 |
+
|
47 |
+
# Log some information about the data
|
48 |
+
logging.info(f"Shape of the dataframe: {df.shape}")
|
49 |
+
logging.info(f"Column names: {df.columns}")
|
50 |
+
logging.info(f"Preview of the DataFrame:\n{df.head()}")
|
51 |
+
logging.info("Data fetched successfully from Hugging Face.")
|
52 |
+
|
53 |
+
return df
|
54 |
+
|
55 |
+
except Exception as e:
|
56 |
+
logging.error(f"An error occurred while fetching data: {str(e)}")
|
57 |
+
raise AnimeRecommendorException(e, sys)
|
58 |
+
|
59 |
+
def ingest_data(self) -> DataIngestionArtifact:
|
60 |
+
"""
|
61 |
+
Orchestrates the data ingestion process, fetching datasets and saving them to the feature store.
|
62 |
+
Returns:
|
63 |
+
DataIngestionArtifact: An artifact containing paths to the ingested datasets.
|
64 |
+
"""
|
65 |
+
try:
|
66 |
+
# Load anime and rating data from Hugging Face datasets
|
67 |
+
anime_df = self.fetch_data_from_huggingface(self.data_ingestion_config.anime_filepath)
|
68 |
+
rating_df = self.fetch_data_from_huggingface(self.data_ingestion_config.rating_filepath)
|
69 |
+
|
70 |
+
# Export data to DataFrame
|
71 |
+
export_data_to_dataframe(anime_df, file_path=self.data_ingestion_config.feature_store_anime_file_path)
|
72 |
+
export_data_to_dataframe(rating_df, file_path=self.data_ingestion_config.feature_store_userrating_file_path)
|
73 |
+
|
74 |
+
# Create artifact to store data ingestion info
|
75 |
+
dataingestionartifact = DataIngestionArtifact(
|
76 |
+
feature_store_anime_file_path=self.data_ingestion_config.feature_store_anime_file_path,
|
77 |
+
feature_store_userrating_file_path=self.data_ingestion_config.feature_store_userrating_file_path
|
78 |
+
)
|
79 |
+
return dataingestionartifact
|
80 |
+
|
81 |
+
except Exception as e:
|
82 |
+
raise AnimeRecommendorException(e, sys)
|
anime_recommender/{source β components}/data_transformation.py
RENAMED
@@ -1,115 +1,109 @@
|
|
1 |
-
import sys
|
2 |
-
import numpy as np
|
3 |
-
import pandas as pd
|
4 |
-
|
5 |
-
from anime_recommender.
|
6 |
-
from anime_recommender.
|
7 |
-
from anime_recommender.
|
8 |
-
from anime_recommender.
|
9 |
-
from anime_recommender.entity.
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
merged_df
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
""
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
data_transformation_artifact = DataTransformationArtifact(
|
110 |
-
merged_file_path=self.data_transformation_config.merged_file_path
|
111 |
-
)
|
112 |
-
|
113 |
-
return data_transformation_artifact
|
114 |
-
except Exception as e:
|
115 |
raise AnimeRecommendorException(e,sys)
|
|
|
1 |
+
import sys
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
from anime_recommender.loggers.logging import logging
|
5 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
6 |
+
from anime_recommender.utils.main_utils.utils import export_data_to_dataframe
|
7 |
+
from anime_recommender.constant import *
|
8 |
+
from anime_recommender.entity.config_entity import DataTransformationConfig
|
9 |
+
from anime_recommender.entity.artifact_entity import DataIngestionArtifact,DataTransformationArtifact
|
10 |
+
|
11 |
+
class DataTransformation:
|
12 |
+
"""
|
13 |
+
Class for handling data transformation for energy generation models.
|
14 |
+
"""
|
15 |
+
def __init__(self,data_ingestion_artifact:DataIngestionArtifact,data_transformation_config:DataTransformationConfig):
|
16 |
+
"""
|
17 |
+
Initializes the DataTransformation class with the given data ingestion and configuration artifacts.
|
18 |
+
Args:
|
19 |
+
data_ingestion_artifact (DataIngestionArtifact): The artifact containing ingested data paths.
|
20 |
+
data_transformation_config (DataTransformationConfig): Configuration object for data transformation.
|
21 |
+
"""
|
22 |
+
try:
|
23 |
+
self.data_ingestion_artifact = data_ingestion_artifact
|
24 |
+
self.data_transformation_config = data_transformation_config
|
25 |
+
except Exception as e:
|
26 |
+
raise AnimeRecommendorException(e,sys)
|
27 |
+
|
28 |
+
@staticmethod
|
29 |
+
def read_data(file_path)->pd.DataFrame:
|
30 |
+
"""
|
31 |
+
Reads data from a CSV file.
|
32 |
+
Args:
|
33 |
+
file_path (str): Path to the CSV file.
|
34 |
+
Returns:
|
35 |
+
pd.DataFrame: The DataFrame containing the data from the CSV file.
|
36 |
+
"""
|
37 |
+
try:
|
38 |
+
return pd.read_csv(file_path)
|
39 |
+
except Exception as e:
|
40 |
+
raise AnimeRecommendorException(e,sys)
|
41 |
+
|
42 |
+
@staticmethod
|
43 |
+
def merge_data(anime_df: pd.DataFrame, rating_df: pd.DataFrame) -> pd.DataFrame:
|
44 |
+
"""
|
45 |
+
Merges the anime and rating DataFrames on 'anime_id'.
|
46 |
+
Args:
|
47 |
+
anime_df (pd.DataFrame): DataFrame containing anime information.
|
48 |
+
rating_df (pd.DataFrame): DataFrame containing user rating information.
|
49 |
+
Returns:
|
50 |
+
pd.DataFrame: Merged DataFrame on 'anime_id'.
|
51 |
+
"""
|
52 |
+
try:
|
53 |
+
merged_df = pd.merge(rating_df, anime_df, on="anime_id", how="inner")
|
54 |
+
logging.info(f"Shape of the Merged dataframe:{merged_df.shape}")
|
55 |
+
logging.info(f"Column names: {merged_df.columns}")
|
56 |
+
return merged_df
|
57 |
+
except Exception as e:
|
58 |
+
raise AnimeRecommendorException(e, sys)
|
59 |
+
|
60 |
+
@staticmethod
|
61 |
+
def clean_filter_data(merged_df: pd.DataFrame) -> pd.DataFrame:
|
62 |
+
"""
|
63 |
+
Cleans the merged DataFrame by replacing 'UNKNOWN' with NaN, filling NaN values with median and also filters the data.
|
64 |
+
|
65 |
+
Args:
|
66 |
+
merged_df (pd.DataFrame): Merged DataFrame to clean and filter.
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
pd.DataFrame: Cleaned and Filtered DataFrame with NaN values handled.
|
70 |
+
"""
|
71 |
+
try:
|
72 |
+
merged_df['average_rating'].replace('UNKNOWN', np.nan)
|
73 |
+
merged_df['average_rating'] = pd.to_numeric(merged_df['average_rating'], errors='coerce')
|
74 |
+
merged_df['average_rating'].fillna(merged_df['average_rating'].median())
|
75 |
+
merged_df = merged_df[merged_df['average_rating'] > 6]
|
76 |
+
cols_to_drop = [ 'username', 'overview', 'type', 'episodes', 'producers',
|
77 |
+
'licensors', 'studios', 'source', 'rank', 'popularity',
|
78 |
+
'favorites', 'scored by', 'members' ]
|
79 |
+
cleaned_df = merged_df.copy()
|
80 |
+
cleaned_df.drop(columns=cols_to_drop, inplace=True)
|
81 |
+
logging.info(f"Shape of the Merged dataframe:{cleaned_df.shape}")
|
82 |
+
logging.info(f"Column names: {cleaned_df.columns}")
|
83 |
+
logging.info(f"Preview of the merged DataFrame:\n{cleaned_df.head()}")
|
84 |
+
return cleaned_df
|
85 |
+
except Exception as e:
|
86 |
+
raise AnimeRecommendorException(e, sys)
|
87 |
+
|
88 |
+
def initiate_data_transformation(self)->DataTransformationArtifact:
|
89 |
+
"""
|
90 |
+
Initiates the data transformation process by reading, transforming, and saving the data.
|
91 |
+
|
92 |
+
Returns:
|
93 |
+
DataTransformationArtifact: The artifact containing paths to the transformed data.
|
94 |
+
"""
|
95 |
+
logging.info("Entering initiate_data_transformation method of DataTransformation class.")
|
96 |
+
try:
|
97 |
+
anime_df = DataTransformation.read_data(self.data_ingestion_artifact.feature_store_anime_file_path)
|
98 |
+
rating_df = DataTransformation.read_data(self.data_ingestion_artifact.feature_store_userrating_file_path)
|
99 |
+
merged_df = DataTransformation.merge_data(anime_df, rating_df)
|
100 |
+
transformed_df = DataTransformation.clean_filter_data(merged_df)
|
101 |
+
|
102 |
+
export_data_to_dataframe(transformed_df, self.data_transformation_config.merged_file_path)
|
103 |
+
data_transformation_artifact = DataTransformationArtifact(
|
104 |
+
merged_file_path=self.data_transformation_config.merged_file_path
|
105 |
+
)
|
106 |
+
|
107 |
+
return data_transformation_artifact
|
108 |
+
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
raise AnimeRecommendorException(e,sys)
|
anime_recommender/{source β components}/top_anime_recommenders.py
RENAMED
@@ -1,53 +1,75 @@
|
|
1 |
-
import sys
|
2 |
-
from anime_recommender.exception.exception import AnimeRecommendorException
|
3 |
-
from anime_recommender.loggers.logging import logging
|
4 |
-
from anime_recommender.utils.main_utils.utils import load_csv_data
|
5 |
-
from anime_recommender.entity.artifact_entity import DataIngestionArtifact
|
6 |
-
from anime_recommender.model_trainer.top_anime_filtering import PopularityBasedFiltering
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
raise AnimeRecommendorException(e,sys)
|
|
|
1 |
+
import sys
|
2 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
3 |
+
from anime_recommender.loggers.logging import logging
|
4 |
+
from anime_recommender.utils.main_utils.utils import load_csv_data
|
5 |
+
from anime_recommender.entity.artifact_entity import DataIngestionArtifact
|
6 |
+
from anime_recommender.model_trainer.top_anime_filtering import PopularityBasedFiltering
|
7 |
+
|
8 |
+
class PopularityBasedRecommendor:
|
9 |
+
"""
|
10 |
+
A class that provides anime recommendations based on different popularity criteria.
|
11 |
+
"""
|
12 |
+
def __init__(self,data_ingestion_artifact = DataIngestionArtifact):
|
13 |
+
"""
|
14 |
+
Initializes the PopularityBasedRecommendor with the ingested anime dataset.
|
15 |
+
|
16 |
+
Args:
|
17 |
+
data_ingestion_artifact (DataIngestionArtifact): An artifact containing the feature store file paths.
|
18 |
+
"""
|
19 |
+
try:
|
20 |
+
self.data_ingestion_artifact = data_ingestion_artifact
|
21 |
+
except Exception as e:
|
22 |
+
raise AnimeRecommendorException(e,sys)
|
23 |
+
|
24 |
+
def initiate_model_trainer(self,filter_type:str):
|
25 |
+
"""
|
26 |
+
Trains the popularity-based recommender model and logs the top anime recommendations
|
27 |
+
based on the specified filter type.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
filter_type (str): The type of filtering to apply.
|
31 |
+
Options include:
|
32 |
+
- 'popular_animes': Most popular anime based on user engagement.
|
33 |
+
- 'top_ranked_animes': Highest ranked anime.
|
34 |
+
- 'overall_top_rated_animes': Overall top-rated anime.
|
35 |
+
- 'favorite_animes': Most favorited anime.
|
36 |
+
- 'top_animes_members': Anime with the highest number of members.
|
37 |
+
- 'popular_anime_among_members': Most popular anime among members.
|
38 |
+
- 'top_avg_rated': Anime with the highest average ratings.
|
39 |
+
"""
|
40 |
+
try:
|
41 |
+
logging.info("Loading transformed data...")
|
42 |
+
df = load_csv_data(self.data_ingestion_artifact.feature_store_anime_file_path)
|
43 |
+
|
44 |
+
recommender = PopularityBasedFiltering(df)
|
45 |
+
|
46 |
+
if filter_type == 'popular_animes':
|
47 |
+
popular_animes = recommender.popular_animes(n =10)
|
48 |
+
logging.info(f"Popular Anime recommendations: {popular_animes}")
|
49 |
+
|
50 |
+
elif filter_type == 'top_ranked_animes':
|
51 |
+
top_ranked_animes = recommender.top_ranked_animes(n =10)
|
52 |
+
logging.info(f"top_ranked_animes recommendations: {top_ranked_animes}")
|
53 |
+
|
54 |
+
elif filter_type == 'overall_top_rated_animes':
|
55 |
+
overall_top_rated_animes = recommender.overall_top_rated_animes(n =10)
|
56 |
+
logging.info(f"overall_top_rated_animes recommendations: {overall_top_rated_animes}")
|
57 |
+
|
58 |
+
elif filter_type == 'favorite_animes':
|
59 |
+
favorite_animes = recommender.favorite_animes(n =10)
|
60 |
+
logging.info(f"favorite_animes recommendations: {favorite_animes}")
|
61 |
+
|
62 |
+
elif filter_type == 'top_animes_members':
|
63 |
+
top_animes_members = recommender.top_animes_members(n = 10)
|
64 |
+
logging.info(f"top_animes_members recommendations: {top_animes_members}")
|
65 |
+
|
66 |
+
elif filter_type == 'popular_anime_among_members':
|
67 |
+
popular_anime_among_members = recommender.popular_anime_among_members(n =10)
|
68 |
+
logging.info(f"popular_anime_among_members recommendations: {popular_anime_among_members}")
|
69 |
+
|
70 |
+
elif filter_type == 'top_avg_rated':
|
71 |
+
top_avg_rated = recommender.top_avg_rated(n =10)
|
72 |
+
logging.info(f"top_avg_rated recommendations: {top_avg_rated}")
|
73 |
+
|
74 |
+
except Exception as e:
|
75 |
raise AnimeRecommendorException(e,sys)
|
anime_recommender/constant/__init__.py
CHANGED
@@ -1,40 +1,40 @@
|
|
1 |
-
"""
|
2 |
-
Defining common constant variables for training pipeline
|
3 |
-
"""
|
4 |
-
PIPELINE_NAME: str = "AnimeRecommender"
|
5 |
-
ARTIFACT_DIR: str = "Artifacts"
|
6 |
-
ANIME_FILE_NAME: str = "Animes.csv"
|
7 |
-
RATING_FILE_NAME:str = "UserRatings.csv"
|
8 |
-
MERGED_FILE_NAME:str = "Anime_UserRatings.csv"
|
9 |
-
|
10 |
-
ANIME_FILE_PATH:str = "krishnaveni76/Animes"
|
11 |
-
RATING_FILE_PATH:str = "krishnaveni76/UserRatings"
|
12 |
-
ANIMEUSERRATINGS_FILE_PATH:str = "krishnaveni76/Anime_UserRatings"
|
13 |
-
MODELS_FILEPATH = "krishnaveni76/anime-recommendation-models"
|
14 |
-
|
15 |
-
"""
|
16 |
-
Data Ingestion related constant start with DATA_INGESTION VAR NAME
|
17 |
-
"""
|
18 |
-
DATA_INGESTION_DIR_NAME: str = "data_ingestion"
|
19 |
-
DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store"
|
20 |
-
DATA_INGESTION_INGESTED_DIR: str = "ingested"
|
21 |
-
|
22 |
-
"""
|
23 |
-
Data Transformation related constant start with DATA_VALIDATION VAR NAME
|
24 |
-
"""
|
25 |
-
DATA_TRANSFORMATION_DIR:str = "data_transformation"
|
26 |
-
DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR:str = "transformed"
|
27 |
-
|
28 |
-
"""
|
29 |
-
Model Trainer related constant start with MODEL TRAINER VAR NAME
|
30 |
-
"""
|
31 |
-
MODEL_TRAINER_DIR_NAME: str = "trained_models"
|
32 |
-
|
33 |
-
MODEL_TRAINER_COL_TRAINED_MODEL_DIR: str = "collaborative_recommenders"
|
34 |
-
MODEL_TRAINER_SVD_TRAINED_MODEL_NAME: str = "svd.pkl"
|
35 |
-
MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME: str = "itembasedknn.pkl"
|
36 |
-
MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME: str = "userbasedknn.pkl"
|
37 |
-
|
38 |
-
MODEL_TRAINER_CON_TRAINED_MODEL_DIR:str = "content_based_recommenders"
|
39 |
-
MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME:str = "cosine_similarity.pkl"
|
40 |
|
|
|
1 |
+
"""
|
2 |
+
Defining common constant variables for training pipeline
|
3 |
+
"""
|
4 |
+
PIPELINE_NAME: str = "AnimeRecommender"
|
5 |
+
ARTIFACT_DIR: str = "Artifacts"
|
6 |
+
ANIME_FILE_NAME: str = "Animes.csv"
|
7 |
+
RATING_FILE_NAME:str = "UserRatings.csv"
|
8 |
+
MERGED_FILE_NAME:str = "Anime_UserRatings.csv"
|
9 |
+
|
10 |
+
ANIME_FILE_PATH:str = "krishnaveni76/Animes"
|
11 |
+
RATING_FILE_PATH:str = "krishnaveni76/UserRatings"
|
12 |
+
ANIMEUSERRATINGS_FILE_PATH:str = "krishnaveni76/Anime_UserRatings"
|
13 |
+
MODELS_FILEPATH = "krishnaveni76/anime-recommendation-models"
|
14 |
+
|
15 |
+
"""
|
16 |
+
Data Ingestion related constant start with DATA_INGESTION VAR NAME
|
17 |
+
"""
|
18 |
+
DATA_INGESTION_DIR_NAME: str = "data_ingestion"
|
19 |
+
DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store"
|
20 |
+
DATA_INGESTION_INGESTED_DIR: str = "ingested"
|
21 |
+
|
22 |
+
"""
|
23 |
+
Data Transformation related constant start with DATA_VALIDATION VAR NAME
|
24 |
+
"""
|
25 |
+
DATA_TRANSFORMATION_DIR:str = "data_transformation"
|
26 |
+
DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR:str = "transformed"
|
27 |
+
|
28 |
+
"""
|
29 |
+
Model Trainer related constant start with MODEL TRAINER VAR NAME
|
30 |
+
"""
|
31 |
+
MODEL_TRAINER_DIR_NAME: str = "trained_models"
|
32 |
+
|
33 |
+
MODEL_TRAINER_COL_TRAINED_MODEL_DIR: str = "collaborative_recommenders"
|
34 |
+
MODEL_TRAINER_SVD_TRAINED_MODEL_NAME: str = "svd.pkl"
|
35 |
+
MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME: str = "itembasedknn.pkl"
|
36 |
+
MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME: str = "userbasedknn.pkl"
|
37 |
+
|
38 |
+
MODEL_TRAINER_CON_TRAINED_MODEL_DIR:str = "content_based_recommenders"
|
39 |
+
MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME:str = "cosine_similarity.pkl"
|
40 |
|
anime_recommender/entity/artifact_entity.py
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
-
from dataclasses import dataclass
|
2 |
-
from typing import Optional
|
3 |
-
|
4 |
-
@dataclass
|
5 |
-
class DataIngestionArtifact:
|
6 |
-
feature_store_anime_file_path:str
|
7 |
-
feature_store_userrating_file_path:str
|
8 |
-
|
9 |
-
@dataclass
|
10 |
-
class DataTransformationArtifact:
|
11 |
-
merged_file_path:str
|
12 |
-
|
13 |
-
@dataclass
|
14 |
-
class CollaborativeModelArtifact:
|
15 |
-
svd_file_path: Optional[str] = None
|
16 |
-
item_based_knn_file_path: Optional[str] = None
|
17 |
-
user_based_knn_file_path: Optional[str] = None
|
18 |
-
|
19 |
-
@dataclass
|
20 |
-
class ContentBasedModelArtifact:
|
21 |
cosine_similarity_model_file_path:str
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import Optional
|
3 |
+
|
4 |
+
@dataclass
|
5 |
+
class DataIngestionArtifact:
|
6 |
+
feature_store_anime_file_path:str
|
7 |
+
feature_store_userrating_file_path:str
|
8 |
+
|
9 |
+
@dataclass
|
10 |
+
class DataTransformationArtifact:
|
11 |
+
merged_file_path:str
|
12 |
+
|
13 |
+
@dataclass
|
14 |
+
class CollaborativeModelArtifact:
|
15 |
+
svd_file_path: Optional[str] = None
|
16 |
+
item_based_knn_file_path: Optional[str] = None
|
17 |
+
user_based_knn_file_path: Optional[str] = None
|
18 |
+
|
19 |
+
@dataclass
|
20 |
+
class ContentBasedModelArtifact:
|
21 |
cosine_similarity_model_file_path:str
|
anime_recommender/entity/config_entity.py
CHANGED
@@ -1,66 +1,66 @@
|
|
1 |
-
import os
|
2 |
-
from datetime import datetime
|
3 |
-
from anime_recommender.constant import *
|
4 |
-
|
5 |
-
class TrainingPipelineConfig:
|
6 |
-
"""
|
7 |
-
Configuration for the training pipeline, including artifact directory and timestamp.
|
8 |
-
"""
|
9 |
-
def __init__(self, timestamp=datetime.now()):
|
10 |
-
"""
|
11 |
-
Initialize the configuration with a unique timestamp.
|
12 |
-
"""
|
13 |
-
timestamp = timestamp.strftime("%m_%d_%Y_%H_%M_%S")
|
14 |
-
self.pipeline_name = PIPELINE_NAME
|
15 |
-
self.artifact_dir = os.path.join(ARTIFACT_DIR, timestamp)
|
16 |
-
self.model_dir=os.path.join("final_model")
|
17 |
-
self.timestamp: str = timestamp
|
18 |
-
|
19 |
-
class DataIngestionConfig:
|
20 |
-
"""
|
21 |
-
Configuration for data ingestion, including paths for feature store, train, test, and validation files.
|
22 |
-
"""
|
23 |
-
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
|
24 |
-
"""
|
25 |
-
Initialize data ingestion paths and parameters.
|
26 |
-
"""
|
27 |
-
self.data_ingestion_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_INGESTION_DIR_NAME)
|
28 |
-
self.feature_store_anime_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, ANIME_FILE_NAME)
|
29 |
-
self.feature_store_userrating_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, RATING_FILE_NAME)
|
30 |
-
self.anime_filepath: str = ANIME_FILE_PATH
|
31 |
-
self.rating_filepath: str = RATING_FILE_PATH
|
32 |
-
|
33 |
-
class DataTransformationConfig:
|
34 |
-
"""
|
35 |
-
Configuration for data transformation, including paths for transformed data and preprocessing objects.
|
36 |
-
"""
|
37 |
-
def __init__(self,training_pipeline_config:TrainingPipelineConfig):
|
38 |
-
"""
|
39 |
-
Initialize data transformation paths.
|
40 |
-
"""
|
41 |
-
self.data_transformation_dir:str = os.path.join(training_pipeline_config.artifact_dir,DATA_TRANSFORMATION_DIR)
|
42 |
-
self.merged_file_path:str = os.path.join(self.data_transformation_dir,DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR,MERGED_FILE_NAME)
|
43 |
-
|
44 |
-
class CollaborativeModelConfig:
|
45 |
-
"""
|
46 |
-
Configuration for model training, including paths for trained models.
|
47 |
-
"""
|
48 |
-
def __init__(self,training_pipeline_config:TrainingPipelineConfig):
|
49 |
-
"""
|
50 |
-
Initialize model trainer paths.
|
51 |
-
"""
|
52 |
-
self.model_trainer_dir:str = os.path.join(training_pipeline_config.artifact_dir,MODEL_TRAINER_DIR_NAME)
|
53 |
-
self.svd_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
|
54 |
-
self.user_knn_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
|
55 |
-
self.item_knn_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
|
56 |
-
|
57 |
-
class ContentBasedModelConfig:
|
58 |
-
"""
|
59 |
-
Configuration for model training, including paths for trained models.
|
60 |
-
"""
|
61 |
-
def __init__(self,training_pipeline_config:TrainingPipelineConfig):
|
62 |
-
"""
|
63 |
-
Initialize model trainer paths.
|
64 |
-
"""
|
65 |
-
self.model_trainer_dir:str = os.path.join(training_pipeline_config.artifact_dir,MODEL_TRAINER_DIR_NAME)
|
66 |
self.cosine_similarity_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_CON_TRAINED_MODEL_DIR,MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
|
|
|
1 |
+
import os
|
2 |
+
from datetime import datetime
|
3 |
+
from anime_recommender.constant import *
|
4 |
+
|
5 |
+
class TrainingPipelineConfig:
|
6 |
+
"""
|
7 |
+
Configuration for the training pipeline, including artifact directory and timestamp.
|
8 |
+
"""
|
9 |
+
def __init__(self, timestamp=datetime.now()):
|
10 |
+
"""
|
11 |
+
Initialize the configuration with a unique timestamp.
|
12 |
+
"""
|
13 |
+
timestamp = timestamp.strftime("%m_%d_%Y_%H_%M_%S")
|
14 |
+
self.pipeline_name = PIPELINE_NAME
|
15 |
+
self.artifact_dir = os.path.join(ARTIFACT_DIR, timestamp)
|
16 |
+
self.model_dir=os.path.join("final_model")
|
17 |
+
self.timestamp: str = timestamp
|
18 |
+
|
19 |
+
class DataIngestionConfig:
|
20 |
+
"""
|
21 |
+
Configuration for data ingestion, including paths for feature store, train, test, and validation files.
|
22 |
+
"""
|
23 |
+
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
|
24 |
+
"""
|
25 |
+
Initialize data ingestion paths and parameters.
|
26 |
+
"""
|
27 |
+
self.data_ingestion_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_INGESTION_DIR_NAME)
|
28 |
+
self.feature_store_anime_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, ANIME_FILE_NAME)
|
29 |
+
self.feature_store_userrating_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, RATING_FILE_NAME)
|
30 |
+
self.anime_filepath: str = ANIME_FILE_PATH
|
31 |
+
self.rating_filepath: str = RATING_FILE_PATH
|
32 |
+
|
33 |
+
class DataTransformationConfig:
|
34 |
+
"""
|
35 |
+
Configuration for data transformation, including paths for transformed data and preprocessing objects.
|
36 |
+
"""
|
37 |
+
def __init__(self,training_pipeline_config:TrainingPipelineConfig):
|
38 |
+
"""
|
39 |
+
Initialize data transformation paths.
|
40 |
+
"""
|
41 |
+
self.data_transformation_dir:str = os.path.join(training_pipeline_config.artifact_dir,DATA_TRANSFORMATION_DIR)
|
42 |
+
self.merged_file_path:str = os.path.join(self.data_transformation_dir,DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR,MERGED_FILE_NAME)
|
43 |
+
|
44 |
+
class CollaborativeModelConfig:
|
45 |
+
"""
|
46 |
+
Configuration for model training, including paths for trained models.
|
47 |
+
"""
|
48 |
+
def __init__(self,training_pipeline_config:TrainingPipelineConfig):
|
49 |
+
"""
|
50 |
+
Initialize model trainer paths.
|
51 |
+
"""
|
52 |
+
self.model_trainer_dir:str = os.path.join(training_pipeline_config.artifact_dir,MODEL_TRAINER_DIR_NAME)
|
53 |
+
self.svd_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
|
54 |
+
self.user_knn_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
|
55 |
+
self.item_knn_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
|
56 |
+
|
57 |
+
class ContentBasedModelConfig:
|
58 |
+
"""
|
59 |
+
Configuration for model training, including paths for trained models.
|
60 |
+
"""
|
61 |
+
def __init__(self,training_pipeline_config:TrainingPipelineConfig):
|
62 |
+
"""
|
63 |
+
Initialize model trainer paths.
|
64 |
+
"""
|
65 |
+
self.model_trainer_dir:str = os.path.join(training_pipeline_config.artifact_dir,MODEL_TRAINER_DIR_NAME)
|
66 |
self.cosine_similarity_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_CON_TRAINED_MODEL_DIR,MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
|
anime_recommender/exception/exception.py
CHANGED
@@ -1,44 +1,44 @@
|
|
1 |
-
import sys
|
2 |
-
|
3 |
-
class AnimeRecommendorException(Exception):
|
4 |
-
"""
|
5 |
-
Custom exception class for handling errors in the Energy Generation Prediction project.
|
6 |
-
|
7 |
-
This class captures the error message, file name, and line number where an exception occurred.
|
8 |
-
It is useful for debugging and identifying the source of the error in a structured way.
|
9 |
-
"""
|
10 |
-
def __init__(self,error_message, error_details:sys):
|
11 |
-
"""
|
12 |
-
Initialize the EnergyGenerationException instance.
|
13 |
-
|
14 |
-
Args:
|
15 |
-
error_message (str): The error message describing the exception.
|
16 |
-
error_details (sys): The sys module, used to extract exception details.
|
17 |
-
|
18 |
-
Attributes:
|
19 |
-
error_message (str): Stores the original error message.
|
20 |
-
lineno (int): The line number where the exception occurred.
|
21 |
-
file_name (str): The file name where the exception occurred.
|
22 |
-
"""
|
23 |
-
self.error_message = error_message
|
24 |
-
_,_,exc_tb = error_details.exc_info()
|
25 |
-
|
26 |
-
self.lineno = exc_tb.tb_lineno
|
27 |
-
self.file_name = exc_tb.tb_frame.f_code.co_filename
|
28 |
-
|
29 |
-
def __str__(self):
|
30 |
-
"""
|
31 |
-
Return the formatted error message.
|
32 |
-
|
33 |
-
Returns:
|
34 |
-
str: A string containing the file name, line number, and error message.
|
35 |
-
"""
|
36 |
-
return "Error occured in python script name [{0}] line number [{1}] error message [{2}]".format(
|
37 |
-
self.file_name,self.lineno, str(self.error_message))
|
38 |
-
|
39 |
-
if __name__=="__main__":
|
40 |
-
try:
|
41 |
-
a = 1/0 # This example will raise a ZeroDivisionError
|
42 |
-
print("This will not be printed",a)
|
43 |
-
except Exception as e:
|
44 |
raise AnimeRecommendorException(e,sys)
|
|
|
1 |
+
import sys
|
2 |
+
|
3 |
+
class AnimeRecommendorException(Exception):
|
4 |
+
"""
|
5 |
+
Custom exception class for handling errors in the Energy Generation Prediction project.
|
6 |
+
|
7 |
+
This class captures the error message, file name, and line number where an exception occurred.
|
8 |
+
It is useful for debugging and identifying the source of the error in a structured way.
|
9 |
+
"""
|
10 |
+
def __init__(self,error_message, error_details:sys):
|
11 |
+
"""
|
12 |
+
Initialize the EnergyGenerationException instance.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
error_message (str): The error message describing the exception.
|
16 |
+
error_details (sys): The sys module, used to extract exception details.
|
17 |
+
|
18 |
+
Attributes:
|
19 |
+
error_message (str): Stores the original error message.
|
20 |
+
lineno (int): The line number where the exception occurred.
|
21 |
+
file_name (str): The file name where the exception occurred.
|
22 |
+
"""
|
23 |
+
self.error_message = error_message
|
24 |
+
_,_,exc_tb = error_details.exc_info()
|
25 |
+
|
26 |
+
self.lineno = exc_tb.tb_lineno
|
27 |
+
self.file_name = exc_tb.tb_frame.f_code.co_filename
|
28 |
+
|
29 |
+
def __str__(self):
|
30 |
+
"""
|
31 |
+
Return the formatted error message.
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
str: A string containing the file name, line number, and error message.
|
35 |
+
"""
|
36 |
+
return "Error occured in python script name [{0}] line number [{1}] error message [{2}]".format(
|
37 |
+
self.file_name,self.lineno, str(self.error_message))
|
38 |
+
|
39 |
+
if __name__=="__main__":
|
40 |
+
try:
|
41 |
+
a = 1/0 # This example will raise a ZeroDivisionError
|
42 |
+
print("This will not be printed",a)
|
43 |
+
except Exception as e:
|
44 |
raise AnimeRecommendorException(e,sys)
|
anime_recommender/loggers/logging.py
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
-
import os
|
2 |
-
import logging
|
3 |
-
from datetime import datetime
|
4 |
-
|
5 |
-
LOGS_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
|
6 |
-
|
7 |
-
logs_dir = os.path.join(os.getcwd(), "logs")
|
8 |
-
os.makedirs(logs_dir, exist_ok=True)
|
9 |
-
|
10 |
-
LOGS_FILE_PATH = os.path.join(logs_dir,LOGS_FILE)
|
11 |
-
|
12 |
-
logging.basicConfig(
|
13 |
-
filename= LOGS_FILE_PATH,
|
14 |
-
format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
|
15 |
-
level= logging.INFO,
|
16 |
)
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from datetime import datetime
|
4 |
+
|
5 |
+
LOGS_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
|
6 |
+
|
7 |
+
logs_dir = os.path.join(os.getcwd(), "logs")
|
8 |
+
os.makedirs(logs_dir, exist_ok=True)
|
9 |
+
|
10 |
+
LOGS_FILE_PATH = os.path.join(logs_dir,LOGS_FILE)
|
11 |
+
|
12 |
+
logging.basicConfig(
|
13 |
+
filename= LOGS_FILE_PATH,
|
14 |
+
format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
|
15 |
+
level= logging.INFO,
|
16 |
)
|
anime_recommender/model_trainer/collaborative_modelling.py
CHANGED
@@ -1,183 +1,263 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
from anime_recommender.
|
4 |
-
|
5 |
-
|
6 |
-
from surprise import
|
7 |
-
from
|
8 |
-
from
|
9 |
-
from
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
recommendations
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
recommendations.
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from anime_recommender.loggers.logging import logging
|
3 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
+
|
5 |
+
from surprise import Reader, Dataset, SVD
|
6 |
+
from surprise.model_selection import cross_validate
|
7 |
+
from scipy.sparse import csr_matrix
|
8 |
+
from sklearn.neighbors import NearestNeighbors
|
9 |
+
from collections import Counter
|
10 |
+
|
11 |
+
class CollaborativeAnimeRecommender:
|
12 |
+
"""
|
13 |
+
A collaborative filtering-based anime recommender system that supports:
|
14 |
+
- Singular Value Decomposition (SVD)
|
15 |
+
- Item-based KNN
|
16 |
+
- User-based KNN
|
17 |
+
"""
|
18 |
+
def __init__(self, df):
|
19 |
+
"""
|
20 |
+
Initializes the recommender system with a given dataset.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
df (pd.DataFrame): DataFrame containing anime ratings with 'user_id', 'anime_id', 'rating', etc.
|
24 |
+
"""
|
25 |
+
try:
|
26 |
+
logging.info("Initializing CollaborativeAnimeRecommender")
|
27 |
+
self.df = df
|
28 |
+
self.svd = None
|
29 |
+
self.knn_item_based = None
|
30 |
+
self.knn_user_based = None
|
31 |
+
self.prepare_data()
|
32 |
+
except Exception as e:
|
33 |
+
raise AnimeRecommendorException(e)
|
34 |
+
|
35 |
+
def prepare_data(self):
|
36 |
+
"""
|
37 |
+
Prepares data for training.
|
38 |
+
"""
|
39 |
+
try:
|
40 |
+
self.df = self.df.drop_duplicates()
|
41 |
+
reader = Reader(rating_scale=(1, 10))
|
42 |
+
self.data = Dataset.load_from_df(self.df[['user_id', 'anime_id', 'rating']], reader)
|
43 |
+
self.anime_pivot = self.df.pivot_table(index='name', columns='user_id', values='rating').fillna(0)
|
44 |
+
self.user_pivot = self.df.pivot_table(index='user_id', columns='name', values='rating').fillna(0)
|
45 |
+
logging.info("Data preparation completed...")
|
46 |
+
except Exception as e:
|
47 |
+
raise AnimeRecommendorException(e)
|
48 |
+
|
49 |
+
def train_svd(self):
|
50 |
+
"""
|
51 |
+
Trains the Singular Value Decomposition (SVD) model using Surprise.
|
52 |
+
"""
|
53 |
+
try:
|
54 |
+
logging.info("Training SVD model")
|
55 |
+
self.svd = SVD()
|
56 |
+
cross_validate(self.svd, self.data, cv=5)
|
57 |
+
trainset = self.data.build_full_trainset()
|
58 |
+
self.svd.fit(trainset)
|
59 |
+
logging.info("SVD model training completed")
|
60 |
+
except Exception as e:
|
61 |
+
raise AnimeRecommendorException(e)
|
62 |
+
|
63 |
+
def train_knn_item_based(self):
|
64 |
+
"""
|
65 |
+
Trains an item-based KNN model using cosine similarity.
|
66 |
+
"""
|
67 |
+
try:
|
68 |
+
logging.info("Training KNN model")
|
69 |
+
item_user_matrix = csr_matrix(self.anime_pivot.values)
|
70 |
+
self.knn_item_based = NearestNeighbors(metric='cosine', algorithm='brute')
|
71 |
+
self.knn_item_based.fit(item_user_matrix)
|
72 |
+
except Exception as e:
|
73 |
+
raise AnimeRecommendorException(e)
|
74 |
+
|
75 |
+
def train_knn_user_based(self):
|
76 |
+
"""Train the KNN model for user-based recommendations."""
|
77 |
+
try:
|
78 |
+
logging.info("Training KNN model")
|
79 |
+
user_item_matrix = csr_matrix(self.user_pivot.values)
|
80 |
+
self.knn_user_based = NearestNeighbors(metric='cosine', algorithm='brute')
|
81 |
+
self.knn_user_based.fit(user_item_matrix)
|
82 |
+
logging.info("KNN model training completed")
|
83 |
+
except Exception as e:
|
84 |
+
raise AnimeRecommendorException(e)
|
85 |
+
|
86 |
+
def print_unique_user_ids(self):
|
87 |
+
"""
|
88 |
+
Logs and returns unique user IDs in the dataset.
|
89 |
+
|
90 |
+
Returns:
|
91 |
+
np.ndarray: Array of unique user IDs.
|
92 |
+
"""
|
93 |
+
try:
|
94 |
+
unique_user_ids = self.df['user_id'].unique()
|
95 |
+
logging.info(f"Unique User IDs: {unique_user_ids}")
|
96 |
+
return unique_user_ids
|
97 |
+
except Exception as e:
|
98 |
+
raise AnimeRecommendorException(e)
|
99 |
+
|
100 |
+
def get_svd_recommendations(self, user_id, n=10, svd_model=None)-> pd.DataFrame:
|
101 |
+
"""
|
102 |
+
Generates anime recommendations using the trained SVD model.
|
103 |
+
|
104 |
+
Args:
|
105 |
+
user_id (int): The user ID for which recommendations are generated.
|
106 |
+
n (int): Number of recommendations to return. Default is 10.
|
107 |
+
svd_model (SVD, optional): Pretrained SVD model. Uses self.svd if not provided.
|
108 |
+
|
109 |
+
Returns:
|
110 |
+
pd.DataFrame: A DataFrame containing recommended anime details.
|
111 |
+
"""
|
112 |
+
try:
|
113 |
+
# Use the provided SVD model or the trained self.svd model
|
114 |
+
svd_model = svd_model or self.svd
|
115 |
+
if svd_model is None:
|
116 |
+
raise ValueError("SVD model is not provided or trained.")
|
117 |
+
|
118 |
+
# Ensure user exists in the dataset
|
119 |
+
if user_id not in self.df['user_id'].unique():
|
120 |
+
return f"User ID '{user_id}' not found in the dataset."
|
121 |
+
|
122 |
+
# Get unique anime IDs
|
123 |
+
anime_ids = self.df['anime_id'].unique()
|
124 |
+
|
125 |
+
# Predict ratings for all anime for the given user
|
126 |
+
predictions = [(anime_id, svd_model.predict(user_id, anime_id).est) for anime_id in anime_ids]
|
127 |
+
predictions.sort(key=lambda x: x[1], reverse=True)
|
128 |
+
|
129 |
+
# Extract top N anime IDs
|
130 |
+
recommended_anime_ids = [pred[0] for pred in predictions[:n]]
|
131 |
+
|
132 |
+
# Get details of recommended anime
|
133 |
+
recommended_anime = self.df[self.df['anime_id'].isin(recommended_anime_ids)].drop_duplicates(subset='anime_id')
|
134 |
+
logging.info(f"Shape of recommended_anime: {recommended_anime.shape}")
|
135 |
+
# Limit to N recommendations
|
136 |
+
recommended_anime = recommended_anime.head(n)
|
137 |
+
|
138 |
+
return pd.DataFrame({
|
139 |
+
'Anime Name': recommended_anime['name'].values,
|
140 |
+
'Genres': recommended_anime['genres'].values,
|
141 |
+
'Image URL': recommended_anime['image url'].values,
|
142 |
+
'Rating': recommended_anime['average_rating'].values
|
143 |
+
})
|
144 |
+
except Exception as e:
|
145 |
+
raise AnimeRecommendorException(e)
|
146 |
+
|
147 |
+
def get_item_based_recommendations(self, anime_name, n_recommendations=10, knn_item_model=None):
|
148 |
+
"""
|
149 |
+
Get item-based recommendations for a given anime using a KNN model.
|
150 |
+
|
151 |
+
Args:
|
152 |
+
anime_name (str): The title of the anime for which recommendations are needed.
|
153 |
+
n_recommendations (int): The number of recommendations to return. Defaults to 10.
|
154 |
+
knn_item_model (NearestNeighbors): A trained KNN model. Defaults to None, in which case self.knn_item_based is used.
|
155 |
+
|
156 |
+
Returns:
|
157 |
+
pd.DataFrame: A DataFrame containing recommended anime names, genres, image URLs, and ratings.
|
158 |
+
"""
|
159 |
+
try:
|
160 |
+
# Use the provided model or fall back to self.knn_item_based
|
161 |
+
knn_item_based = knn_item_model or self.knn_item_based
|
162 |
+
if knn_item_based is None:
|
163 |
+
raise ValueError("Item-based KNN model is not provided or trained.")
|
164 |
+
|
165 |
+
# Ensure the anime name exists in the pivot table
|
166 |
+
if anime_name not in self.anime_pivot.index:
|
167 |
+
return f"Anime title '{anime_name}' not found in the dataset."
|
168 |
+
|
169 |
+
# Get the index of the anime in the pivot table
|
170 |
+
query_index = self.anime_pivot.index.get_loc(anime_name)
|
171 |
+
|
172 |
+
# Use the KNN model to find similar animes (n_neighbors + 1 to exclude the query itself)
|
173 |
+
distances, indices = knn_item_based.kneighbors(
|
174 |
+
self.anime_pivot.iloc[query_index, :].values.reshape(1, -1),
|
175 |
+
n_neighbors=n_recommendations + 1 # +1 because the query anime itself is included
|
176 |
+
)
|
177 |
+
recommendations = []
|
178 |
+
for i in range(1, len(distances.flatten())): # Start from 1 to exclude the query anime
|
179 |
+
anime_title = self.anime_pivot.index[indices.flatten()[i]]
|
180 |
+
distance = distances.flatten()[i]
|
181 |
+
recommendations.append((anime_title, distance))
|
182 |
+
|
183 |
+
# Fetch the recommended anime names (top n_recommendations)
|
184 |
+
recommended_anime_titles = [rec[0] for rec in recommendations]
|
185 |
+
logging.info(f"Top {n_recommendations} recommendations: {recommended_anime_titles}")
|
186 |
+
filtered_df = self.df[self.df['name'].isin(recommended_anime_titles)].drop_duplicates(subset='name')
|
187 |
+
logging.info(f"Shape of filtered df: {filtered_df.shape}")
|
188 |
+
|
189 |
+
# Limit the results to `n_recommendations`
|
190 |
+
filtered_df = filtered_df.head(n_recommendations)
|
191 |
+
|
192 |
+
return pd.DataFrame({
|
193 |
+
'Anime Name': filtered_df['name'].values,
|
194 |
+
'Image URL': filtered_df['image url'].values,
|
195 |
+
'Genres': filtered_df['genres'].values,
|
196 |
+
'Rating': filtered_df['average_rating'].values
|
197 |
+
})
|
198 |
+
except Exception as e:
|
199 |
+
raise AnimeRecommendorException(e)
|
200 |
+
|
201 |
+
def get_user_based_recommendations(self, user_id, n_recommendations=10, knn_user_model=None)-> pd.DataFrame:
|
202 |
+
"""
|
203 |
+
Recommend anime for a given user based on similar users' preferences using the provided or trained KNN model.
|
204 |
+
|
205 |
+
Args:
|
206 |
+
user_id (int): The ID of the user.
|
207 |
+
n_recommendations (int): Number of recommendations to return.
|
208 |
+
knn_user_model (NearestNeighbors): Pre-trained KNN model. Defaults to None.
|
209 |
+
|
210 |
+
Returns:
|
211 |
+
pd.DataFrame: A DataFrame containing recommended anime titles and related information.
|
212 |
+
"""
|
213 |
+
try:
|
214 |
+
# Use the provided model or fall back to self.knn_user_based
|
215 |
+
knn_user_based = knn_user_model or self.knn_user_based
|
216 |
+
if knn_user_based is None:
|
217 |
+
raise ValueError("User-based KNN model is not provided or trained.")
|
218 |
+
|
219 |
+
# Ensure the user exists in the pivot table
|
220 |
+
user_id = float(user_id)
|
221 |
+
if user_id not in self.user_pivot.index:
|
222 |
+
return f"User ID '{user_id}' not found in the dataset."
|
223 |
+
|
224 |
+
# Find the user's index in the pivot table
|
225 |
+
user_idx = self.user_pivot.index.get_loc(user_id)
|
226 |
+
|
227 |
+
# Use the KNN model to find the nearest neighbors
|
228 |
+
distances, indices = knn_user_based.kneighbors(
|
229 |
+
self.user_pivot.iloc[user_idx, :].values.reshape(1, -1),
|
230 |
+
n_neighbors=n_recommendations + 1 # Include the user itself
|
231 |
+
)
|
232 |
+
|
233 |
+
# Get the list of anime the user has already rated
|
234 |
+
user_rated_anime = set(self.user_pivot.columns[self.user_pivot.iloc[user_idx, :] > 0])
|
235 |
+
|
236 |
+
# Collect all anime rated by the nearest neighbors
|
237 |
+
all_neighbor_ratings = []
|
238 |
+
for i in range(1, len(distances.flatten())): # Start from 1 to exclude the user itself
|
239 |
+
neighbor_idx = indices.flatten()[i]
|
240 |
+
neighbor_rated_anime = self.user_pivot.iloc[neighbor_idx, :]
|
241 |
+
neighbor_ratings = neighbor_rated_anime[neighbor_rated_anime > 0]
|
242 |
+
all_neighbor_ratings.extend(neighbor_ratings.index)
|
243 |
+
|
244 |
+
# Count how frequently each anime is rated by neighbors
|
245 |
+
anime_counter = Counter(all_neighbor_ratings)
|
246 |
+
|
247 |
+
# Recommend anime not already rated by the user
|
248 |
+
recommendations = [(anime, count) for anime, count in anime_counter.items() if anime not in user_rated_anime]
|
249 |
+
recommendations.sort(key=lambda x: x[1], reverse=True)
|
250 |
+
# Extract recommended anime names and their details
|
251 |
+
recommended_anime_titles = [rec[0] for rec in recommendations[:n_recommendations]]
|
252 |
+
filtered_df = self.df[self.df['name'].isin(recommended_anime_titles)].drop_duplicates(subset='name')
|
253 |
+
logging.info(f"Shape of filtered df: {filtered_df.shape}")
|
254 |
+
filtered_df = filtered_df.head(n_recommendations)
|
255 |
+
|
256 |
+
return pd.DataFrame({
|
257 |
+
'Anime Name': filtered_df['name'].values,
|
258 |
+
'Image URL': filtered_df['image url'].values,
|
259 |
+
'Genres': filtered_df['genres'].values,
|
260 |
+
'Rating': filtered_df['average_rating'].values
|
261 |
+
})
|
262 |
+
except Exception as e:
|
263 |
+
raise AnimeRecommendorException(e)
|
anime_recommender/model_trainer/content_based_modelling.py
CHANGED
@@ -1,71 +1,73 @@
|
|
1 |
-
import os
|
2 |
-
import pandas as pd
|
3 |
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
-
import joblib
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
self.df = df.dropna()
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
)
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
# Load the model (TF-IDF and cosine similarity matrix)
|
48 |
-
with open(model_path, 'rb') as f:
|
49 |
-
self.tfv, self.cosine_sim = joblib.load(f)
|
50 |
-
|
51 |
-
# Check if the DataFrame is loaded
|
52 |
-
if self.df is None:
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
'
|
67 |
-
'
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
71 |
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
import joblib
|
6 |
+
from anime_recommender.loggers.logging import logging
|
7 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
8 |
+
|
9 |
+
class ContentBasedRecommender:
|
10 |
+
"""
|
11 |
+
A content-based recommender system using TF-IDF Vectorizer and Cosine Similarity.
|
12 |
+
"""
|
13 |
+
def __init__(self, df):
|
14 |
+
try:
|
15 |
+
self.df = df.dropna()
|
16 |
+
# Create a Series mapping anime names to their indices
|
17 |
+
self.indices = pd.Series(self.df.index, index=self.df['name']).drop_duplicates()
|
18 |
+
# Initialize and fit the TF-IDF Vectorizer on the 'genres' column
|
19 |
+
self.tfv = TfidfVectorizer(
|
20 |
+
min_df=3,
|
21 |
+
strip_accents='unicode',
|
22 |
+
analyzer='word',
|
23 |
+
token_pattern=r'\w{1,}',
|
24 |
+
ngram_range=(1, 3),
|
25 |
+
stop_words='english'
|
26 |
+
)
|
27 |
+
self.tfv_matrix = self.tfv.fit_transform(self.df['genres'])
|
28 |
+
self.cosine_sim = cosine_similarity(self.tfv_matrix, self.tfv_matrix)
|
29 |
+
except Exception as e:
|
30 |
+
raise AnimeRecommendorException(e)
|
31 |
+
|
32 |
+
def save_model(self, model_path):
|
33 |
+
"""Save the trained model (TF-IDF and Cosine Similarity Matrix) to a file."""
|
34 |
+
try:
|
35 |
+
logging.info(f"Saving model to {model_path}")
|
36 |
+
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
37 |
+
with open(model_path, 'wb') as f:
|
38 |
+
joblib.dump((self.tfv, self.cosine_sim), f)
|
39 |
+
logging.info("Content recommender Model saved successfully")
|
40 |
+
except Exception as e:
|
41 |
+
raise AnimeRecommendorException(e)
|
42 |
+
|
43 |
+
def get_rec_cosine(self, title, model_path, n_recommendations=5):
|
44 |
+
"""Get recommendations based on cosine similarity for a given anime title."""
|
45 |
+
try:
|
46 |
+
logging.info(f"Loading model from {model_path}")
|
47 |
+
# Load the model (TF-IDF and cosine similarity matrix)
|
48 |
+
with open(model_path, 'rb') as f:
|
49 |
+
self.tfv, self.cosine_sim = joblib.load(f)
|
50 |
+
logging.info("Model loaded successfully")
|
51 |
+
# Check if the DataFrame is loaded
|
52 |
+
if self.df is None:
|
53 |
+
logging.error("The DataFrame is not loaded, cannot make recommendations.")
|
54 |
+
raise ValueError("The DataFrame is not loaded, cannot make recommendations.")
|
55 |
+
|
56 |
+
if title not in self.indices.index:
|
57 |
+
logging.warning(f"Anime title '{title}' not found in dataset")
|
58 |
+
return f"Anime title '{title}' not found in the dataset."
|
59 |
+
|
60 |
+
idx = self.indices[title]
|
61 |
+
cosinesim_scores = list(enumerate(self.cosine_sim[idx]))
|
62 |
+
cosinesim_scores = sorted(cosinesim_scores, key=lambda x: x[1], reverse=True)[1:n_recommendations + 1]
|
63 |
+
anime_indices = [i[0] for i in cosinesim_scores]
|
64 |
+
logging.info("Recommendations generated successfully")
|
65 |
+
return pd.DataFrame({
|
66 |
+
'Anime name': self.df['name'].iloc[anime_indices].values,
|
67 |
+
'Image URL': self.df['image url'].iloc[anime_indices].values,
|
68 |
+
'Genres': self.df['genres'].iloc[anime_indices].values,
|
69 |
+
'Rating': self.df['average_rating'].iloc[anime_indices].values
|
70 |
+
})
|
71 |
+
except Exception as e:
|
72 |
+
raise AnimeRecommendorException(e)
|
73 |
|
anime_recommender/model_trainer/top_anime_filtering.py
CHANGED
@@ -1,93 +1,104 @@
|
|
1 |
-
import sys
|
2 |
-
import numpy as np
|
3 |
-
import pandas as pd
|
4 |
-
from anime_recommender.
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
'
|
20 |
-
'
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
def
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
sorted_df = self.df.sort_values(by=['members'
|
69 |
-
|
70 |
-
return
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
)
|
88 |
-
|
89 |
-
|
90 |
-
'
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
})
|
|
|
1 |
+
import sys
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
from anime_recommender.loggers.logging import logging
|
5 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
6 |
+
|
7 |
+
class PopularityBasedFiltering:
|
8 |
+
"""
|
9 |
+
A recommender system that filters popular animes based on different criteria such as popularity, rank,
|
10 |
+
average rating, number of members, and favorites.
|
11 |
+
"""
|
12 |
+
def __init__(self, df):
|
13 |
+
"""
|
14 |
+
Initialize the PopularityBasedFiltering class with a DataFrame.
|
15 |
+
"""
|
16 |
+
try:
|
17 |
+
logging.info("Initializing PopularityBasedFiltering class")
|
18 |
+
self.df = df
|
19 |
+
self.df['average_rating'] = pd.to_numeric(self.df['average_rating'], errors='coerce')
|
20 |
+
self.df['average_rating'].fillna(self.df['average_rating'].median())
|
21 |
+
except Exception as e:
|
22 |
+
logging.error("Error initializing PopularityBasedFiltering: %s", str(e))
|
23 |
+
raise AnimeRecommendorException(e, sys)
|
24 |
+
|
25 |
+
def popular_animes(self, n=10):
|
26 |
+
"""
|
27 |
+
Get the top N most popular animes.
|
28 |
+
"""
|
29 |
+
logging.info("Fetching top %d most popular animes", n)
|
30 |
+
sorted_df = self.df.sort_values(by=['popularity'], ascending=True)
|
31 |
+
top_n_anime = sorted_df.head(n)
|
32 |
+
return self._format_output(top_n_anime)
|
33 |
+
|
34 |
+
def top_ranked_animes(self, n=10):
|
35 |
+
"""
|
36 |
+
Get the top N ranked animes.
|
37 |
+
"""
|
38 |
+
logging.info("Fetching top %d ranked animes", n)
|
39 |
+
self.df['rank'] = self.df['rank'].replace('UNKNOWN', np.nan).astype(float)
|
40 |
+
df_filtered = self.df[self.df['rank'] > 1]
|
41 |
+
sorted_df = df_filtered.sort_values(by=['rank'], ascending=True)
|
42 |
+
top_n_anime = sorted_df.head(n)
|
43 |
+
return self._format_output(top_n_anime)
|
44 |
+
|
45 |
+
def overall_top_rated_animes(self, n=10):
|
46 |
+
"""
|
47 |
+
Get the top N highest-rated animes.
|
48 |
+
"""
|
49 |
+
logging.info("Fetching top %d highest-rated animes", n)
|
50 |
+
sorted_df = self.df.sort_values(by=['average_rating'], ascending=False)
|
51 |
+
top_n_anime = sorted_df.head(n)
|
52 |
+
return self._format_output(top_n_anime)
|
53 |
+
|
54 |
+
def favorite_animes(self, n=10):
|
55 |
+
"""
|
56 |
+
Get the top N most favorited animes.
|
57 |
+
"""
|
58 |
+
logging.info("Fetching top %d most favorited animes", n)
|
59 |
+
sorted_df = self.df.sort_values(by=['favorites'], ascending=False)
|
60 |
+
top_n_anime = sorted_df.head(n)
|
61 |
+
return self._format_output(top_n_anime)
|
62 |
+
|
63 |
+
def top_animes_members(self, n=10):
|
64 |
+
"""
|
65 |
+
Get the top N animes based on the number of members.
|
66 |
+
"""
|
67 |
+
logging.info("Fetching top %d animes based on number of members", n)
|
68 |
+
sorted_df = self.df.sort_values(by=['members'], ascending=False)
|
69 |
+
top_n_anime = sorted_df.head(n)
|
70 |
+
return self._format_output(top_n_anime)
|
71 |
+
|
72 |
+
def popular_anime_among_members(self, n=10):
|
73 |
+
"""
|
74 |
+
Get the top N animes popular among members based on the highest number of members and ratings.
|
75 |
+
"""
|
76 |
+
logging.info("Fetching top %d popular animes among members", n)
|
77 |
+
sorted_df = self.df.sort_values(by=['members', 'average_rating'], ascending=[False, False]).drop_duplicates(subset='name')
|
78 |
+
popular_animes = sorted_df.head(n)
|
79 |
+
return self._format_output(popular_animes)
|
80 |
+
|
81 |
+
def top_avg_rated(self, n=10):
|
82 |
+
"""
|
83 |
+
Get the top N highest-rated animes after handling missing values.
|
84 |
+
"""
|
85 |
+
logging.info("Fetching top %d highest average-rated animes", n)
|
86 |
+
self.df['average_rating'] = pd.to_numeric(self.df['average_rating'], errors='coerce')
|
87 |
+
median_rating = self.df['average_rating'].median()
|
88 |
+
self.df['average_rating'].fillna(median_rating)
|
89 |
+
top_animes = (
|
90 |
+
self.df.drop_duplicates(subset='name')
|
91 |
+
.nlargest(n, 'average_rating')[['name', 'average_rating', 'image url', 'genres']]
|
92 |
+
)
|
93 |
+
return self._format_output(top_animes)
|
94 |
+
|
95 |
+
def _format_output(self, anime_df):
|
96 |
+
"""
|
97 |
+
Format the output as a DataFrame with selected anime attributes.
|
98 |
+
"""
|
99 |
+
return pd.DataFrame({
|
100 |
+
'Anime name': anime_df['name'].values,
|
101 |
+
'Image URL': anime_df['image url'].values,
|
102 |
+
'Genres': anime_df['genres'].values,
|
103 |
+
'Rating': anime_df['average_rating'].values
|
104 |
})
|
anime_recommender/pipelines/training_pipeline.py
CHANGED
@@ -1,152 +1,152 @@
|
|
1 |
-
import sys
|
2 |
-
from anime_recommender.loggers.logging import logging
|
3 |
-
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
-
|
5 |
-
from anime_recommender.
|
6 |
-
from anime_recommender.
|
7 |
-
from anime_recommender.
|
8 |
-
from anime_recommender.
|
9 |
-
from anime_recommender.
|
10 |
-
from anime_recommender.entity.config_entity import (
|
11 |
-
TrainingPipelineConfig,
|
12 |
-
DataIngestionConfig,
|
13 |
-
DataTransformationConfig,
|
14 |
-
CollaborativeModelConfig,
|
15 |
-
ContentBasedModelConfig,
|
16 |
-
)
|
17 |
-
from anime_recommender.entity.artifact_entity import (
|
18 |
-
DataIngestionArtifact,
|
19 |
-
DataTransformationArtifact,
|
20 |
-
CollaborativeModelArtifact,
|
21 |
-
ContentBasedModelArtifact,
|
22 |
-
)
|
23 |
-
|
24 |
-
class TrainingPipeline:
|
25 |
-
"""
|
26 |
-
Orchestrates the entire anime recommender training pipeline, including
|
27 |
-
data ingestion, transformation, model training, and popularity-based recommendations.
|
28 |
-
"""
|
29 |
-
def __init__(self):
|
30 |
-
"""
|
31 |
-
Initialize the TrainingPipeline with required configurations.
|
32 |
-
"""
|
33 |
-
self.training_pipeline_config = TrainingPipelineConfig()
|
34 |
-
|
35 |
-
def start_data_ingestion(self) -> DataIngestionArtifact:
|
36 |
-
"""
|
37 |
-
Starts the data ingestion process.
|
38 |
-
Returns:
|
39 |
-
DataIngestionArtifact: Contains information about ingested data.
|
40 |
-
"""
|
41 |
-
try:
|
42 |
-
logging.info("Initiating Data Ingestion...")
|
43 |
-
data_ingestion_config = DataIngestionConfig(self.training_pipeline_config)
|
44 |
-
data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config)
|
45 |
-
data_ingestion_artifact = data_ingestion.ingest_data()
|
46 |
-
logging.info(f"Data Ingestion completed: {data_ingestion_artifact}")
|
47 |
-
return data_ingestion_artifact
|
48 |
-
except Exception as e:
|
49 |
-
raise AnimeRecommendorException(e, sys)
|
50 |
-
|
51 |
-
def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataTransformationArtifact:
|
52 |
-
"""
|
53 |
-
Starts the data transformation process.
|
54 |
-
Returns:
|
55 |
-
DataTransformationArtifact: Contains transformed data.
|
56 |
-
"""
|
57 |
-
try:
|
58 |
-
logging.info("Initiating Data Transformation...")
|
59 |
-
data_transformation_config = DataTransformationConfig(self.training_pipeline_config)
|
60 |
-
data_transformation = DataTransformation(
|
61 |
-
data_ingestion_artifact=data_ingestion_artifact,
|
62 |
-
data_transformation_config=data_transformation_config
|
63 |
-
)
|
64 |
-
data_transformation_artifact = data_transformation.initiate_data_transformation()
|
65 |
-
logging.info(f"Data Transformation completed: {data_transformation_artifact}")
|
66 |
-
return data_transformation_artifact
|
67 |
-
except Exception as e:
|
68 |
-
raise AnimeRecommendorException(e, sys)
|
69 |
-
|
70 |
-
def start_collaborative_model_training(self, data_transformation_artifact: DataTransformationArtifact) -> CollaborativeModelArtifact:
|
71 |
-
"""
|
72 |
-
Starts collaborative filtering model training.
|
73 |
-
Returns:
|
74 |
-
CollaborativeModelTrainerArtifact: Trained collaborative model artifact.
|
75 |
-
"""
|
76 |
-
try:
|
77 |
-
logging.info("Initiating Collaborative Model Training...")
|
78 |
-
collaborative_model_config = CollaborativeModelConfig(self.training_pipeline_config)
|
79 |
-
collaborative_model_trainer = CollaborativeModelTrainer(
|
80 |
-
collaborative_model_trainer_config=collaborative_model_config,
|
81 |
-
data_transformation_artifact=data_transformation_artifact
|
82 |
-
)
|
83 |
-
collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='user_knn')
|
84 |
-
logging.info(f"Collaborative Model Training completed: {collaborative_model_trainer_artifact}")
|
85 |
-
return collaborative_model_trainer_artifact
|
86 |
-
except Exception as e:
|
87 |
-
raise AnimeRecommendorException(e, sys)
|
88 |
-
|
89 |
-
def start_content_based_model_training(self, data_ingestion_artifact: DataIngestionArtifact) -> ContentBasedModelArtifact:
|
90 |
-
"""
|
91 |
-
Starts content-based filtering model training.
|
92 |
-
Returns:
|
93 |
-
ContentBasedModelTrainerArtifact: Trained content-based model artifact.
|
94 |
-
"""
|
95 |
-
try:
|
96 |
-
logging.info("Initiating Content-Based Model Training...")
|
97 |
-
content_based_model_config = ContentBasedModelConfig(self.training_pipeline_config)
|
98 |
-
content_based_model_trainer = ContentBasedModelTrainer(
|
99 |
-
content_based_model_trainer_config=content_based_model_config,
|
100 |
-
data_ingestion_artifact=data_ingestion_artifact
|
101 |
-
)
|
102 |
-
content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
|
103 |
-
logging.info(f"Content-Based Model Training completed: {content_based_model_trainer_artifact}")
|
104 |
-
return content_based_model_trainer_artifact
|
105 |
-
except Exception as e:
|
106 |
-
raise AnimeRecommendorException(e, sys)
|
107 |
-
|
108 |
-
def start_popularity_based_filtering(self, data_ingestion_artifact: DataIngestionArtifact):
|
109 |
-
"""
|
110 |
-
Generates popularity-based recommendations.
|
111 |
-
"""
|
112 |
-
try:
|
113 |
-
logging.info("Initiating Popularity-Based Filtering...")
|
114 |
-
filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
|
115 |
-
recommendations = filtering.initiate_model_trainer(filter_type='popular_animes')
|
116 |
-
logging.info("Popularity-Based Filtering completed.")
|
117 |
-
return recommendations
|
118 |
-
except Exception as e:
|
119 |
-
raise AnimeRecommendorException(e, sys)
|
120 |
-
|
121 |
-
def run_pipeline(self):
|
122 |
-
"""
|
123 |
-
Executes the entire training pipeline.
|
124 |
-
"""
|
125 |
-
try:
|
126 |
-
# Data Ingestion
|
127 |
-
data_ingestion_artifact = self.start_data_ingestion()
|
128 |
-
|
129 |
-
# Data Transformation
|
130 |
-
data_transformation_artifact = self.start_data_transformation(data_ingestion_artifact)
|
131 |
-
|
132 |
-
# Collaborative Model Training
|
133 |
-
collaborative_model_trainer_artifact = self.start_collaborative_model_training(data_transformation_artifact)
|
134 |
-
|
135 |
-
# Content-Based Model Training
|
136 |
-
content_based_model_trainer_artifact = self.start_content_based_model_training(data_ingestion_artifact)
|
137 |
-
|
138 |
-
# Popularity-Based Filtering
|
139 |
-
popularity_recommendations = self.start_popularity_based_filtering(data_ingestion_artifact)
|
140 |
-
|
141 |
-
logging.info("Training Pipeline executed successfully.")
|
142 |
-
except Exception as e:
|
143 |
-
raise AnimeRecommendorException(e, sys)
|
144 |
-
|
145 |
-
|
146 |
-
if __name__ == "__main__":
|
147 |
-
try:
|
148 |
-
pipeline = TrainingPipeline()
|
149 |
-
pipeline.run_pipeline()
|
150 |
-
except Exception as e:
|
151 |
-
logging.error(f"Pipeline execution failed: {str(e)}")
|
152 |
raise AnimeRecommendorException(e, sys)
|
|
|
1 |
+
import sys
|
2 |
+
from anime_recommender.loggers.logging import logging
|
3 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
+
|
5 |
+
from anime_recommender.components.data_ingestion import DataIngestion
|
6 |
+
from anime_recommender.components.data_transformation import DataTransformation
|
7 |
+
from anime_recommender.components.collaborative_recommender import CollaborativeModelTrainer
|
8 |
+
from anime_recommender.components.content_based_recommender import ContentBasedModelTrainer
|
9 |
+
from anime_recommender.components.top_anime_recommenders import PopularityBasedRecommendor
|
10 |
+
from anime_recommender.entity.config_entity import (
|
11 |
+
TrainingPipelineConfig,
|
12 |
+
DataIngestionConfig,
|
13 |
+
DataTransformationConfig,
|
14 |
+
CollaborativeModelConfig,
|
15 |
+
ContentBasedModelConfig,
|
16 |
+
)
|
17 |
+
from anime_recommender.entity.artifact_entity import (
|
18 |
+
DataIngestionArtifact,
|
19 |
+
DataTransformationArtifact,
|
20 |
+
CollaborativeModelArtifact,
|
21 |
+
ContentBasedModelArtifact,
|
22 |
+
)
|
23 |
+
|
24 |
+
class TrainingPipeline:
|
25 |
+
"""
|
26 |
+
Orchestrates the entire anime recommender training pipeline, including
|
27 |
+
data ingestion, transformation, model training, and popularity-based recommendations.
|
28 |
+
"""
|
29 |
+
def __init__(self):
|
30 |
+
"""
|
31 |
+
Initialize the TrainingPipeline with required configurations.
|
32 |
+
"""
|
33 |
+
self.training_pipeline_config = TrainingPipelineConfig()
|
34 |
+
|
35 |
+
def start_data_ingestion(self) -> DataIngestionArtifact:
|
36 |
+
"""
|
37 |
+
Starts the data ingestion process.
|
38 |
+
Returns:
|
39 |
+
DataIngestionArtifact: Contains information about ingested data.
|
40 |
+
"""
|
41 |
+
try:
|
42 |
+
logging.info("Initiating Data Ingestion...")
|
43 |
+
data_ingestion_config = DataIngestionConfig(self.training_pipeline_config)
|
44 |
+
data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config)
|
45 |
+
data_ingestion_artifact = data_ingestion.ingest_data()
|
46 |
+
logging.info(f"Data Ingestion completed: {data_ingestion_artifact}")
|
47 |
+
return data_ingestion_artifact
|
48 |
+
except Exception as e:
|
49 |
+
raise AnimeRecommendorException(e, sys)
|
50 |
+
|
51 |
+
def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataTransformationArtifact:
|
52 |
+
"""
|
53 |
+
Starts the data transformation process.
|
54 |
+
Returns:
|
55 |
+
DataTransformationArtifact: Contains transformed data.
|
56 |
+
"""
|
57 |
+
try:
|
58 |
+
logging.info("Initiating Data Transformation...")
|
59 |
+
data_transformation_config = DataTransformationConfig(self.training_pipeline_config)
|
60 |
+
data_transformation = DataTransformation(
|
61 |
+
data_ingestion_artifact=data_ingestion_artifact,
|
62 |
+
data_transformation_config=data_transformation_config
|
63 |
+
)
|
64 |
+
data_transformation_artifact = data_transformation.initiate_data_transformation()
|
65 |
+
logging.info(f"Data Transformation completed: {data_transformation_artifact}")
|
66 |
+
return data_transformation_artifact
|
67 |
+
except Exception as e:
|
68 |
+
raise AnimeRecommendorException(e, sys)
|
69 |
+
|
70 |
+
def start_collaborative_model_training(self, data_transformation_artifact: DataTransformationArtifact) -> CollaborativeModelArtifact:
|
71 |
+
"""
|
72 |
+
Starts collaborative filtering model training.
|
73 |
+
Returns:
|
74 |
+
CollaborativeModelTrainerArtifact: Trained collaborative model artifact.
|
75 |
+
"""
|
76 |
+
try:
|
77 |
+
logging.info("Initiating Collaborative Model Training...")
|
78 |
+
collaborative_model_config = CollaborativeModelConfig(self.training_pipeline_config)
|
79 |
+
collaborative_model_trainer = CollaborativeModelTrainer(
|
80 |
+
collaborative_model_trainer_config=collaborative_model_config,
|
81 |
+
data_transformation_artifact=data_transformation_artifact
|
82 |
+
)
|
83 |
+
collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='user_knn')
|
84 |
+
logging.info(f"Collaborative Model Training completed: {collaborative_model_trainer_artifact}")
|
85 |
+
return collaborative_model_trainer_artifact
|
86 |
+
except Exception as e:
|
87 |
+
raise AnimeRecommendorException(e, sys)
|
88 |
+
|
89 |
+
def start_content_based_model_training(self, data_ingestion_artifact: DataIngestionArtifact) -> ContentBasedModelArtifact:
|
90 |
+
"""
|
91 |
+
Starts content-based filtering model training.
|
92 |
+
Returns:
|
93 |
+
ContentBasedModelTrainerArtifact: Trained content-based model artifact.
|
94 |
+
"""
|
95 |
+
try:
|
96 |
+
logging.info("Initiating Content-Based Model Training...")
|
97 |
+
content_based_model_config = ContentBasedModelConfig(self.training_pipeline_config)
|
98 |
+
content_based_model_trainer = ContentBasedModelTrainer(
|
99 |
+
content_based_model_trainer_config=content_based_model_config,
|
100 |
+
data_ingestion_artifact=data_ingestion_artifact
|
101 |
+
)
|
102 |
+
content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
|
103 |
+
logging.info(f"Content-Based Model Training completed: {content_based_model_trainer_artifact}")
|
104 |
+
return content_based_model_trainer_artifact
|
105 |
+
except Exception as e:
|
106 |
+
raise AnimeRecommendorException(e, sys)
|
107 |
+
|
108 |
+
def start_popularity_based_filtering(self, data_ingestion_artifact: DataIngestionArtifact):
|
109 |
+
"""
|
110 |
+
Generates popularity-based recommendations.
|
111 |
+
"""
|
112 |
+
try:
|
113 |
+
logging.info("Initiating Popularity-Based Filtering...")
|
114 |
+
filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
|
115 |
+
recommendations = filtering.initiate_model_trainer(filter_type='popular_animes')
|
116 |
+
logging.info("Popularity-Based Filtering completed.")
|
117 |
+
return recommendations
|
118 |
+
except Exception as e:
|
119 |
+
raise AnimeRecommendorException(e, sys)
|
120 |
+
|
121 |
+
def run_pipeline(self):
|
122 |
+
"""
|
123 |
+
Executes the entire training pipeline.
|
124 |
+
"""
|
125 |
+
try:
|
126 |
+
# Data Ingestion
|
127 |
+
data_ingestion_artifact = self.start_data_ingestion()
|
128 |
+
|
129 |
+
# Data Transformation
|
130 |
+
data_transformation_artifact = self.start_data_transformation(data_ingestion_artifact)
|
131 |
+
|
132 |
+
# Collaborative Model Training
|
133 |
+
collaborative_model_trainer_artifact = self.start_collaborative_model_training(data_transformation_artifact)
|
134 |
+
|
135 |
+
# Content-Based Model Training
|
136 |
+
content_based_model_trainer_artifact = self.start_content_based_model_training(data_ingestion_artifact)
|
137 |
+
|
138 |
+
# Popularity-Based Filtering
|
139 |
+
popularity_recommendations = self.start_popularity_based_filtering(data_ingestion_artifact)
|
140 |
+
|
141 |
+
logging.info("Training Pipeline executed successfully.")
|
142 |
+
except Exception as e:
|
143 |
+
raise AnimeRecommendorException(e, sys)
|
144 |
+
|
145 |
+
|
146 |
+
if __name__ == "__main__":
|
147 |
+
try:
|
148 |
+
pipeline = TrainingPipeline()
|
149 |
+
pipeline.run_pipeline()
|
150 |
+
except Exception as e:
|
151 |
+
logging.error(f"Pipeline execution failed: {str(e)}")
|
152 |
raise AnimeRecommendorException(e, sys)
|
anime_recommender/utils/main_utils/utils.py
CHANGED
@@ -1,47 +1,89 @@
|
|
1 |
-
import os
|
2 |
-
import sys
|
3 |
-
import
|
4 |
-
import
|
5 |
-
import
|
6 |
-
from anime_recommender.
|
7 |
-
from anime_recommender.
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import pandas as pd
|
4 |
+
import joblib
|
5 |
+
from anime_recommender.loggers.logging import logging
|
6 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
7 |
+
from anime_recommender.constant import *
|
8 |
+
|
9 |
+
def export_data_to_dataframe(dataframe: pd.DataFrame, file_path: str) -> pd.DataFrame:
|
10 |
+
"""
|
11 |
+
Saves a given Pandas DataFrame to a CSV file.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
dataframe (pd.DataFrame): The DataFrame to be saved.
|
15 |
+
file_path (str): The file path where the DataFrame should be stored.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
pd.DataFrame: The same DataFrame that was saved.
|
19 |
+
"""
|
20 |
+
try:
|
21 |
+
logging.info(f"Saving DataFrame to file: {file_path}")
|
22 |
+
dir_path = os.path.dirname(file_path)
|
23 |
+
os.makedirs(dir_path, exist_ok=True)
|
24 |
+
dataframe.to_csv(file_path, index=False, header=True)
|
25 |
+
logging.info(f"DataFrame saved successfully to {file_path}.")
|
26 |
+
return dataframe
|
27 |
+
except Exception as e:
|
28 |
+
logging.error(f"Error saving DataFrame to {file_path}: {e}")
|
29 |
+
raise AnimeRecommendorException(e, sys)
|
30 |
+
|
31 |
+
def load_csv_data(file_path: str) -> pd.DataFrame:
|
32 |
+
"""
|
33 |
+
Loads a CSV file into a Pandas DataFrame.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
file_path (str): The file path of the CSV file.
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
pd.DataFrame: The loaded DataFrame.
|
40 |
+
"""
|
41 |
+
try:
|
42 |
+
logging.info(f"Loading CSV data from file: {file_path}")
|
43 |
+
df = pd.read_csv(file_path)
|
44 |
+
logging.info("CSV file loaded successfully.")
|
45 |
+
return df
|
46 |
+
except Exception as e:
|
47 |
+
logging.error(f"Error loading CSV file {file_path}: {e}")
|
48 |
+
raise AnimeRecommendorException(e, sys) from e
|
49 |
+
|
50 |
+
def save_model(model: object, file_path: str) -> None:
|
51 |
+
"""
|
52 |
+
Saves a machine learning model to a file using joblib.
|
53 |
+
|
54 |
+
Args:
|
55 |
+
model (object): The model object to be saved.
|
56 |
+
file_path (str): The file path where the model should be stored.
|
57 |
+
"""
|
58 |
+
try:
|
59 |
+
logging.info("Entered the save_model method.")
|
60 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
61 |
+
with open(file_path, "wb") as file_obj:
|
62 |
+
joblib.dump(model, file_obj)
|
63 |
+
logging.info(f"Model saved successfully to {file_path}.")
|
64 |
+
except Exception as e:
|
65 |
+
logging.error(f"Error saving model to {file_path}: {e}")
|
66 |
+
raise AnimeRecommendorException(e, sys) from e
|
67 |
+
|
68 |
+
def load_object(file_path: str) -> object:
|
69 |
+
"""
|
70 |
+
Loads a model or object from a file using joblib.
|
71 |
+
|
72 |
+
Args:
|
73 |
+
file_path (str): The file path of the saved model.
|
74 |
+
|
75 |
+
Returns:
|
76 |
+
object: The loaded model.
|
77 |
+
"""
|
78 |
+
try:
|
79 |
+
logging.info(f"Attempting to load object from {file_path}")
|
80 |
+
if not os.path.exists(file_path):
|
81 |
+
error_msg = f"The file: {file_path} does not exist."
|
82 |
+
logging.error(error_msg)
|
83 |
+
raise Exception(error_msg)
|
84 |
+
with open(file_path, "rb") as file_obj:
|
85 |
+
logging.info("Object loaded successfully.")
|
86 |
+
return joblib.load(file_obj)
|
87 |
+
except Exception as e:
|
88 |
+
logging.error(f"Error loading object from {file_path}: {e}")
|
89 |
+
raise AnimeRecommendorException(e, sys) from e
|
app.py
CHANGED
@@ -1,233 +1,233 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import streamlit as st
|
3 |
-
from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
|
4 |
-
from anime_recommender.model_trainer.collaborative_modelling import CollaborativeAnimeRecommender
|
5 |
-
from anime_recommender.model_trainer.top_anime_filtering import PopularityBasedFiltering
|
6 |
-
import joblib
|
7 |
-
from anime_recommender.constant import *
|
8 |
-
from huggingface_hub import hf_hub_download
|
9 |
-
from datasets import load_dataset
|
10 |
-
|
11 |
-
st.set_page_config(page_title="Anime Recommendation System", layout="wide")
|
12 |
-
|
13 |
-
if "anime_data" not in st.session_state or "anime_user_ratings" not in st.session_state:
|
14 |
-
# Load datasets from Hugging Face (assuming no splits)
|
15 |
-
animedataset = load_dataset(ANIME_FILE_PATH, split=None)
|
16 |
-
mergeddataset = load_dataset(ANIMEUSERRATINGS_FILE_PATH, split=None)
|
17 |
-
|
18 |
-
# Convert the dataset to Pandas DataFrame
|
19 |
-
st.session_state.anime_data = pd.DataFrame(animedataset["train"])
|
20 |
-
st.session_state.anime_user_ratings = pd.DataFrame(mergeddataset["train"])
|
21 |
-
|
22 |
-
# Load models only once
|
23 |
-
if "models_loaded" not in st.session_state:
|
24 |
-
st.session_state.models_loaded = {}
|
25 |
-
# Load models
|
26 |
-
st.session_state.models_loaded["cosine_similarity_model"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
|
27 |
-
st.session_state.models_loaded["item_based_knn_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
|
28 |
-
st.session_state.models_loaded["user_based_knn_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
|
29 |
-
st.session_state.models_loaded["svd_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
|
30 |
-
|
31 |
-
# Load the models using joblib
|
32 |
-
with open(st.session_state.models_loaded["item_based_knn_model_path"], "rb") as f:
|
33 |
-
st.session_state.models_loaded["item_based_knn_model"] = joblib.load(f)
|
34 |
-
|
35 |
-
with open(st.session_state.models_loaded["user_based_knn_model_path"], "rb") as f:
|
36 |
-
st.session_state.models_loaded["user_based_knn_model"] = joblib.load(f)
|
37 |
-
|
38 |
-
with open(st.session_state.models_loaded["svd_model_path"], "rb") as f:
|
39 |
-
st.session_state.models_loaded["svd_model"] = joblib.load(f)
|
40 |
-
|
41 |
-
print("Models loaded successfully!")
|
42 |
-
|
43 |
-
# Access the data from session state
|
44 |
-
anime_data = st.session_state.anime_data
|
45 |
-
anime_user_ratings = st.session_state.anime_user_ratings
|
46 |
-
|
47 |
-
# # Display dataset info
|
48 |
-
# st.write("Anime Data:")
|
49 |
-
# st.dataframe(anime_data.head())
|
50 |
-
|
51 |
-
# st.write("Anime User Ratings Data:")
|
52 |
-
# st.dataframe(anime_user_ratings.head())
|
53 |
-
|
54 |
-
# Access the models from session state
|
55 |
-
cosine_similarity_model_path = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
|
56 |
-
item_based_knn_model = st.session_state.models_loaded["item_based_knn_model"]
|
57 |
-
user_based_knn_model = st.session_state.models_loaded["user_based_knn_model"]
|
58 |
-
svd_model = st.session_state.models_loaded["svd_model"]
|
59 |
-
print("Models loaded successfully!")
|
60 |
-
|
61 |
-
# Streamlit UI
|
62 |
-
app_selector = st.sidebar.radio(
|
63 |
-
"Select App", ("Content-Based Recommender", "Collaborative Recommender", "Top Anime Recommender")
|
64 |
-
)
|
65 |
-
|
66 |
-
if app_selector == "Content-Based Recommender":
|
67 |
-
st.title("Content-Based Recommendation System")
|
68 |
-
try:
|
69 |
-
|
70 |
-
anime_list = anime_data["name"].tolist()
|
71 |
-
anime_name = st.selectbox("Pick an anime..unlock similar anime recommendations..", anime_list)
|
72 |
-
|
73 |
-
# Set number of recommendations
|
74 |
-
max_recommendations = min(len(anime_data), 100)
|
75 |
-
n_recommendations = st.slider("Number of Recommendations", 1, max_recommendations, 10)
|
76 |
-
|
77 |
-
# Inject custom CSS for anime name font size
|
78 |
-
st.markdown(
|
79 |
-
"""
|
80 |
-
<style>
|
81 |
-
.anime-title {
|
82 |
-
font-size: 14px !important;
|
83 |
-
font-weight: bold;
|
84 |
-
text-align: center;
|
85 |
-
margin-top: 5px;
|
86 |
-
}
|
87 |
-
</style>
|
88 |
-
""",
|
89 |
-
unsafe_allow_html=True,
|
90 |
-
)
|
91 |
-
# Get Recommendations
|
92 |
-
if st.button("Get Recommendations"):
|
93 |
-
try:
|
94 |
-
recommender = ContentBasedRecommender(anime_data)
|
95 |
-
recommendations = recommender.get_rec_cosine(anime_name, n_recommendations=n_recommendations,model_path=cosine_similarity_model_path)
|
96 |
-
|
97 |
-
if isinstance(recommendations, str):
|
98 |
-
st.warning(recommendations)
|
99 |
-
elif recommendations.empty:
|
100 |
-
st.warning("No recommendations found.")
|
101 |
-
else:
|
102 |
-
st.write(f"Here are the Content-based Recommendations for {anime_name}:")
|
103 |
-
cols = st.columns(5)
|
104 |
-
for i, row in enumerate(recommendations.iterrows()):
|
105 |
-
col = cols[i % 5]
|
106 |
-
with col:
|
107 |
-
st.image(row[1]['Image URL'], use_container_width=True)
|
108 |
-
st.markdown(
|
109 |
-
f"<div class='anime-title'>{row[1]['Anime name']}</div>",
|
110 |
-
unsafe_allow_html=True,
|
111 |
-
)
|
112 |
-
st.caption(f"Genres: {row[1]['Genres']} | Rating: {row[1]['Rating']}")
|
113 |
-
except Exception as e:
|
114 |
-
st.error(f"Unexpected error: {str(e)}")
|
115 |
-
|
116 |
-
except Exception as e:
|
117 |
-
st.error(f"Unexpected error: {str(e)}")
|
118 |
-
|
119 |
-
elif app_selector == "Collaborative Recommender":
|
120 |
-
st.title("Collaborative Recommender System")
|
121 |
-
|
122 |
-
try:
|
123 |
-
# Sidebar for choosing the collaborative filtering method
|
124 |
-
collaborative_method = st.sidebar.selectbox(
|
125 |
-
"Choose a collaborative filtering method:",
|
126 |
-
["SVD Collaborative Filtering", "User-Based Collaborative Filtering", "Anime-Based KNN Collaborative Filtering"]
|
127 |
-
)
|
128 |
-
|
129 |
-
# User input
|
130 |
-
if collaborative_method == "SVD Collaborative Filtering" or collaborative_method == "User-Based Collaborative Filtering":
|
131 |
-
user_ids = anime_user_ratings['user_id'].unique()
|
132 |
-
user_id = st.selectbox("Choose a user, and we'll show you animes they'd recommend", user_ids)
|
133 |
-
n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=50, value=10)
|
134 |
-
elif collaborative_method == "Anime-Based KNN Collaborative Filtering":
|
135 |
-
anime_list = anime_user_ratings["name"].dropna().unique().tolist()
|
136 |
-
anime_name = st.selectbox("Pick an anime, and we'll suggest more titles you'll love", anime_list)
|
137 |
-
n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=50, value=10)
|
138 |
-
|
139 |
-
# Get recommendations
|
140 |
-
if st.button("Get Recommendations"):
|
141 |
-
# Load the recommender
|
142 |
-
recommender = CollaborativeAnimeRecommender(anime_user_ratings)
|
143 |
-
if collaborative_method == "SVD Collaborative Filtering":
|
144 |
-
recommendations = recommender.get_svd_recommendations(user_id, n=n_recommendations, svd_model=svd_model)
|
145 |
-
elif collaborative_method == "User-Based Collaborative Filtering":
|
146 |
-
recommendations = recommender.get_user_based_recommendations(user_id, n_recommendations=n_recommendations, knn_user_model=user_based_knn_model)
|
147 |
-
elif collaborative_method == "Anime-Based KNN Collaborative Filtering":
|
148 |
-
if anime_name:
|
149 |
-
recommendations = recommender.get_item_based_recommendations(anime_name, n_recommendations=n_recommendations, knn_item_model=item_based_knn_model)
|
150 |
-
else:
|
151 |
-
st.error("Invalid Anime Name. Please enter a valid anime title.")
|
152 |
-
|
153 |
-
if isinstance(recommendations, pd.DataFrame) and not recommendations.empty:
|
154 |
-
if len(recommendations) < n_recommendations:
|
155 |
-
st.warning(f"Oops...Only {len(recommendations)} recommendations available, fewer than the requested {n_recommendations}.")
|
156 |
-
st.write(f"Here are the Collaborative Recommendations:")
|
157 |
-
cols = st.columns(5)
|
158 |
-
for i, row in enumerate(recommendations.iterrows()):
|
159 |
-
col = cols[i % 5]
|
160 |
-
with col:
|
161 |
-
st.image(row[1]['Image URL'], use_container_width=True)
|
162 |
-
st.markdown(
|
163 |
-
f"<div class='anime-title'>{row[1]['Anime Name']}</div>",
|
164 |
-
unsafe_allow_html=True,
|
165 |
-
)
|
166 |
-
st.caption(f"Genres: {row[1]['Genres']} | Rating: {row[1]['Rating']}")
|
167 |
-
else:
|
168 |
-
st.error("No recommendations found.")
|
169 |
-
except Exception as e:
|
170 |
-
st.error(f"An error occurred: {e}")
|
171 |
-
|
172 |
-
|
173 |
-
elif app_selector == "Top Anime Recommender":
|
174 |
-
st.title("Top Anime Recommender System")
|
175 |
-
|
176 |
-
try:
|
177 |
-
# Sidebar for choosing the popularity-based filtering method
|
178 |
-
popularity_method = st.sidebar.selectbox(
|
179 |
-
"Choose a Popularity-Based Filtering method:",
|
180 |
-
[
|
181 |
-
"Popular Animes",
|
182 |
-
"Top Ranked Animes",
|
183 |
-
"Overall Top Rated Animes",
|
184 |
-
"Favorite Animes",
|
185 |
-
"Top Animes by Members",
|
186 |
-
"Popular Anime Among Members",
|
187 |
-
"Top Average Rated Animes",
|
188 |
-
]
|
189 |
-
)
|
190 |
-
|
191 |
-
n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=500, value=10)
|
192 |
-
|
193 |
-
if st.button("Get Recommendations"):
|
194 |
-
# Load the popularity-based recommender
|
195 |
-
recommender = PopularityBasedFiltering(anime_data)
|
196 |
-
|
197 |
-
# Get recommendations based on selected method
|
198 |
-
if popularity_method == "Popular Animes":
|
199 |
-
recommendations = recommender.popular_animes(n=n_recommendations)
|
200 |
-
elif popularity_method == "Top Ranked Animes":
|
201 |
-
recommendations = recommender.top_ranked_animes(n=n_recommendations)
|
202 |
-
elif popularity_method == "Overall Top Rated Animes":
|
203 |
-
recommendations = recommender.overall_top_rated_animes(n=n_recommendations)
|
204 |
-
elif popularity_method == "Favorite Animes":
|
205 |
-
recommendations = recommender.favorite_animes(n=n_recommendations)
|
206 |
-
elif popularity_method == "Top Animes by Members":
|
207 |
-
recommendations = recommender.top_animes_members(n=n_recommendations)
|
208 |
-
elif popularity_method == "Popular Anime Among Members":
|
209 |
-
recommendations = recommender.popular_anime_among_members(n=n_recommendations)
|
210 |
-
elif popularity_method == "Top Average Rated Animes":
|
211 |
-
recommendations = recommender.top_avg_rated(n=n_recommendations)
|
212 |
-
else:
|
213 |
-
st.error("Invalid selection. Please choose a valid method.")
|
214 |
-
recommendations = None
|
215 |
-
|
216 |
-
# Display recommendations
|
217 |
-
if isinstance(recommendations, pd.DataFrame) and not recommendations.empty:
|
218 |
-
st.write(f"Here are the {popularity_method}:")
|
219 |
-
cols = st.columns(5)
|
220 |
-
for i, row in recommendations.iterrows():
|
221 |
-
col = cols[i % 5]
|
222 |
-
with col:
|
223 |
-
st.image(row['Image URL'], use_container_width=True)
|
224 |
-
st.markdown(
|
225 |
-
f"<div class='anime-title'>{row['Anime name']}</div>",
|
226 |
-
unsafe_allow_html=True,
|
227 |
-
)
|
228 |
-
st.caption(f"Genres: {row['Genres']} | Rating: {row['Rating']}")
|
229 |
-
else:
|
230 |
-
st.error("No recommendations found.")
|
231 |
-
except Exception as e:
|
232 |
-
st.error(f"An error occurred: {e}")
|
233 |
-
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
|
4 |
+
from anime_recommender.model_trainer.collaborative_modelling import CollaborativeAnimeRecommender
|
5 |
+
from anime_recommender.model_trainer.top_anime_filtering import PopularityBasedFiltering
|
6 |
+
import joblib
|
7 |
+
from anime_recommender.constant import *
|
8 |
+
from huggingface_hub import hf_hub_download
|
9 |
+
from datasets import load_dataset
|
10 |
+
|
11 |
+
st.set_page_config(page_title="Anime Recommendation System", layout="wide")
|
12 |
+
|
13 |
+
if "anime_data" not in st.session_state or "anime_user_ratings" not in st.session_state:
|
14 |
+
# Load datasets from Hugging Face (assuming no splits)
|
15 |
+
animedataset = load_dataset(ANIME_FILE_PATH, split=None)
|
16 |
+
mergeddataset = load_dataset(ANIMEUSERRATINGS_FILE_PATH, split=None)
|
17 |
+
|
18 |
+
# Convert the dataset to Pandas DataFrame
|
19 |
+
st.session_state.anime_data = pd.DataFrame(animedataset["train"])
|
20 |
+
st.session_state.anime_user_ratings = pd.DataFrame(mergeddataset["train"])
|
21 |
+
|
22 |
+
# Load models only once
|
23 |
+
if "models_loaded" not in st.session_state:
|
24 |
+
st.session_state.models_loaded = {}
|
25 |
+
# Load models
|
26 |
+
st.session_state.models_loaded["cosine_similarity_model"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
|
27 |
+
st.session_state.models_loaded["item_based_knn_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
|
28 |
+
st.session_state.models_loaded["user_based_knn_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
|
29 |
+
st.session_state.models_loaded["svd_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
|
30 |
+
|
31 |
+
# Load the models using joblib
|
32 |
+
with open(st.session_state.models_loaded["item_based_knn_model_path"], "rb") as f:
|
33 |
+
st.session_state.models_loaded["item_based_knn_model"] = joblib.load(f)
|
34 |
+
|
35 |
+
with open(st.session_state.models_loaded["user_based_knn_model_path"], "rb") as f:
|
36 |
+
st.session_state.models_loaded["user_based_knn_model"] = joblib.load(f)
|
37 |
+
|
38 |
+
with open(st.session_state.models_loaded["svd_model_path"], "rb") as f:
|
39 |
+
st.session_state.models_loaded["svd_model"] = joblib.load(f)
|
40 |
+
|
41 |
+
print("Models loaded successfully!")
|
42 |
+
|
43 |
+
# Access the data from session state
|
44 |
+
anime_data = st.session_state.anime_data
|
45 |
+
anime_user_ratings = st.session_state.anime_user_ratings
|
46 |
+
|
47 |
+
# # Display dataset info
|
48 |
+
# st.write("Anime Data:")
|
49 |
+
# st.dataframe(anime_data.head())
|
50 |
+
|
51 |
+
# st.write("Anime User Ratings Data:")
|
52 |
+
# st.dataframe(anime_user_ratings.head())
|
53 |
+
|
54 |
+
# Access the models from session state
|
55 |
+
cosine_similarity_model_path = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
|
56 |
+
item_based_knn_model = st.session_state.models_loaded["item_based_knn_model"]
|
57 |
+
user_based_knn_model = st.session_state.models_loaded["user_based_knn_model"]
|
58 |
+
svd_model = st.session_state.models_loaded["svd_model"]
|
59 |
+
print("Models loaded successfully!")
|
60 |
+
|
61 |
+
# Streamlit UI
|
62 |
+
app_selector = st.sidebar.radio(
|
63 |
+
"Select App", ("Content-Based Recommender", "Collaborative Recommender", "Top Anime Recommender")
|
64 |
+
)
|
65 |
+
|
66 |
+
if app_selector == "Content-Based Recommender":
|
67 |
+
st.title("Content-Based Recommendation System")
|
68 |
+
try:
|
69 |
+
|
70 |
+
anime_list = anime_data["name"].tolist()
|
71 |
+
anime_name = st.selectbox("Pick an anime..unlock similar anime recommendations..", anime_list)
|
72 |
+
|
73 |
+
# Set number of recommendations
|
74 |
+
max_recommendations = min(len(anime_data), 100)
|
75 |
+
n_recommendations = st.slider("Number of Recommendations", 1, max_recommendations, 10)
|
76 |
+
|
77 |
+
# Inject custom CSS for anime name font size
|
78 |
+
st.markdown(
|
79 |
+
"""
|
80 |
+
<style>
|
81 |
+
.anime-title {
|
82 |
+
font-size: 14px !important;
|
83 |
+
font-weight: bold;
|
84 |
+
text-align: center;
|
85 |
+
margin-top: 5px;
|
86 |
+
}
|
87 |
+
</style>
|
88 |
+
""",
|
89 |
+
unsafe_allow_html=True,
|
90 |
+
)
|
91 |
+
# Get Recommendations
|
92 |
+
if st.button("Get Recommendations"):
|
93 |
+
try:
|
94 |
+
recommender = ContentBasedRecommender(anime_data)
|
95 |
+
recommendations = recommender.get_rec_cosine(anime_name, n_recommendations=n_recommendations,model_path=cosine_similarity_model_path)
|
96 |
+
|
97 |
+
if isinstance(recommendations, str):
|
98 |
+
st.warning(recommendations)
|
99 |
+
elif recommendations.empty:
|
100 |
+
st.warning("No recommendations found.")
|
101 |
+
else:
|
102 |
+
st.write(f"Here are the Content-based Recommendations for {anime_name}:")
|
103 |
+
cols = st.columns(5)
|
104 |
+
for i, row in enumerate(recommendations.iterrows()):
|
105 |
+
col = cols[i % 5]
|
106 |
+
with col:
|
107 |
+
st.image(row[1]['Image URL'], use_container_width=True)
|
108 |
+
st.markdown(
|
109 |
+
f"<div class='anime-title'>{row[1]['Anime name']}</div>",
|
110 |
+
unsafe_allow_html=True,
|
111 |
+
)
|
112 |
+
st.caption(f"Genres: {row[1]['Genres']} | Rating: {row[1]['Rating']}")
|
113 |
+
except Exception as e:
|
114 |
+
st.error(f"Unexpected error: {str(e)}")
|
115 |
+
|
116 |
+
except Exception as e:
|
117 |
+
st.error(f"Unexpected error: {str(e)}")
|
118 |
+
|
119 |
+
elif app_selector == "Collaborative Recommender":
|
120 |
+
st.title("Collaborative Recommender System")
|
121 |
+
|
122 |
+
try:
|
123 |
+
# Sidebar for choosing the collaborative filtering method
|
124 |
+
collaborative_method = st.sidebar.selectbox(
|
125 |
+
"Choose a collaborative filtering method:",
|
126 |
+
["SVD Collaborative Filtering", "User-Based Collaborative Filtering", "Anime-Based KNN Collaborative Filtering"]
|
127 |
+
)
|
128 |
+
|
129 |
+
# User input
|
130 |
+
if collaborative_method == "SVD Collaborative Filtering" or collaborative_method == "User-Based Collaborative Filtering":
|
131 |
+
user_ids = anime_user_ratings['user_id'].unique()
|
132 |
+
user_id = st.selectbox("Choose a user, and we'll show you animes they'd recommend", user_ids)
|
133 |
+
n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=50, value=10)
|
134 |
+
elif collaborative_method == "Anime-Based KNN Collaborative Filtering":
|
135 |
+
anime_list = anime_user_ratings["name"].dropna().unique().tolist()
|
136 |
+
anime_name = st.selectbox("Pick an anime, and we'll suggest more titles you'll love", anime_list)
|
137 |
+
n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=50, value=10)
|
138 |
+
|
139 |
+
# Get recommendations
|
140 |
+
if st.button("Get Recommendations"):
|
141 |
+
# Load the recommender
|
142 |
+
recommender = CollaborativeAnimeRecommender(anime_user_ratings)
|
143 |
+
if collaborative_method == "SVD Collaborative Filtering":
|
144 |
+
recommendations = recommender.get_svd_recommendations(user_id, n=n_recommendations, svd_model=svd_model)
|
145 |
+
elif collaborative_method == "User-Based Collaborative Filtering":
|
146 |
+
recommendations = recommender.get_user_based_recommendations(user_id, n_recommendations=n_recommendations, knn_user_model=user_based_knn_model)
|
147 |
+
elif collaborative_method == "Anime-Based KNN Collaborative Filtering":
|
148 |
+
if anime_name:
|
149 |
+
recommendations = recommender.get_item_based_recommendations(anime_name, n_recommendations=n_recommendations, knn_item_model=item_based_knn_model)
|
150 |
+
else:
|
151 |
+
st.error("Invalid Anime Name. Please enter a valid anime title.")
|
152 |
+
|
153 |
+
if isinstance(recommendations, pd.DataFrame) and not recommendations.empty:
|
154 |
+
if len(recommendations) < n_recommendations:
|
155 |
+
st.warning(f"Oops...Only {len(recommendations)} recommendations available, fewer than the requested {n_recommendations}.")
|
156 |
+
st.write(f"Here are the Collaborative Recommendations:")
|
157 |
+
cols = st.columns(5)
|
158 |
+
for i, row in enumerate(recommendations.iterrows()):
|
159 |
+
col = cols[i % 5]
|
160 |
+
with col:
|
161 |
+
st.image(row[1]['Image URL'], use_container_width=True)
|
162 |
+
st.markdown(
|
163 |
+
f"<div class='anime-title'>{row[1]['Anime Name']}</div>",
|
164 |
+
unsafe_allow_html=True,
|
165 |
+
)
|
166 |
+
st.caption(f"Genres: {row[1]['Genres']} | Rating: {row[1]['Rating']}")
|
167 |
+
else:
|
168 |
+
st.error("No recommendations found.")
|
169 |
+
except Exception as e:
|
170 |
+
st.error(f"An error occurred: {e}")
|
171 |
+
|
172 |
+
|
173 |
+
elif app_selector == "Top Anime Recommender":
|
174 |
+
st.title("Top Anime Recommender System")
|
175 |
+
|
176 |
+
try:
|
177 |
+
# Sidebar for choosing the popularity-based filtering method
|
178 |
+
popularity_method = st.sidebar.selectbox(
|
179 |
+
"Choose a Popularity-Based Filtering method:",
|
180 |
+
[
|
181 |
+
"Popular Animes",
|
182 |
+
"Top Ranked Animes",
|
183 |
+
"Overall Top Rated Animes",
|
184 |
+
"Favorite Animes",
|
185 |
+
"Top Animes by Members",
|
186 |
+
"Popular Anime Among Members",
|
187 |
+
"Top Average Rated Animes",
|
188 |
+
]
|
189 |
+
)
|
190 |
+
|
191 |
+
n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=500, value=10)
|
192 |
+
|
193 |
+
if st.button("Get Recommendations"):
|
194 |
+
# Load the popularity-based recommender
|
195 |
+
recommender = PopularityBasedFiltering(anime_data)
|
196 |
+
|
197 |
+
# Get recommendations based on selected method
|
198 |
+
if popularity_method == "Popular Animes":
|
199 |
+
recommendations = recommender.popular_animes(n=n_recommendations)
|
200 |
+
elif popularity_method == "Top Ranked Animes":
|
201 |
+
recommendations = recommender.top_ranked_animes(n=n_recommendations)
|
202 |
+
elif popularity_method == "Overall Top Rated Animes":
|
203 |
+
recommendations = recommender.overall_top_rated_animes(n=n_recommendations)
|
204 |
+
elif popularity_method == "Favorite Animes":
|
205 |
+
recommendations = recommender.favorite_animes(n=n_recommendations)
|
206 |
+
elif popularity_method == "Top Animes by Members":
|
207 |
+
recommendations = recommender.top_animes_members(n=n_recommendations)
|
208 |
+
elif popularity_method == "Popular Anime Among Members":
|
209 |
+
recommendations = recommender.popular_anime_among_members(n=n_recommendations)
|
210 |
+
elif popularity_method == "Top Average Rated Animes":
|
211 |
+
recommendations = recommender.top_avg_rated(n=n_recommendations)
|
212 |
+
else:
|
213 |
+
st.error("Invalid selection. Please choose a valid method.")
|
214 |
+
recommendations = None
|
215 |
+
|
216 |
+
# Display recommendations
|
217 |
+
if isinstance(recommendations, pd.DataFrame) and not recommendations.empty:
|
218 |
+
st.write(f"Here are the {popularity_method}:")
|
219 |
+
cols = st.columns(5)
|
220 |
+
for i, row in recommendations.iterrows():
|
221 |
+
col = cols[i % 5]
|
222 |
+
with col:
|
223 |
+
st.image(row['Image URL'], use_container_width=True)
|
224 |
+
st.markdown(
|
225 |
+
f"<div class='anime-title'>{row['Anime name']}</div>",
|
226 |
+
unsafe_allow_html=True,
|
227 |
+
)
|
228 |
+
st.caption(f"Genres: {row['Genres']} | Rating: {row['Rating']}")
|
229 |
+
else:
|
230 |
+
st.error("No recommendations found.")
|
231 |
+
except Exception as e:
|
232 |
+
st.error(f"An error occurred: {e}")
|
233 |
+
|
notebooks/EDA.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/final_ARS.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -6,4 +6,9 @@ transformers
|
|
6 |
huggingface_hub
|
7 |
datasets
|
8 |
scikit-surprise
|
|
|
|
|
|
|
|
|
|
|
9 |
# -e .
|
|
|
6 |
huggingface_hub
|
7 |
datasets
|
8 |
scikit-surprise
|
9 |
+
# wordcloud
|
10 |
+
# seaborn
|
11 |
+
# matplotlib
|
12 |
+
# squarify
|
13 |
+
# tensorflow
|
14 |
# -e .
|
run_pipeline.py
CHANGED
@@ -1,53 +1,53 @@
|
|
1 |
-
import sys
|
2 |
-
from anime_recommender.loggers.logging import logging
|
3 |
-
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
-
from anime_recommender.
|
5 |
-
from anime_recommender.entity.config_entity import TrainingPipelineConfig,DataIngestionConfig,DataTransformationConfig,CollaborativeModelConfig,ContentBasedModelConfig
|
6 |
-
from anime_recommender.
|
7 |
-
from anime_recommender.
|
8 |
-
from anime_recommender.
|
9 |
-
from anime_recommender.
|
10 |
-
|
11 |
-
|
12 |
-
if __name__ == "__main__":
|
13 |
-
try:
|
14 |
-
training_pipeline_config = TrainingPipelineConfig()
|
15 |
-
data_ingestion_config = DataIngestionConfig(training_pipeline_config)
|
16 |
-
data_ingestion = DataIngestion(data_ingestion_config)
|
17 |
-
logging.info("Initiating Data Ingestion.")
|
18 |
-
data_ingestion_artifact = data_ingestion.ingest_data()
|
19 |
-
logging.info(f"Data ingestion completed.")
|
20 |
-
print(data_ingestion_artifact)
|
21 |
-
|
22 |
-
# Data Transformation
|
23 |
-
data_transformation_config = DataTransformationConfig(training_pipeline_config)
|
24 |
-
data_transformation = DataTransformation(data_ingestion_artifact,data_transformation_config)
|
25 |
-
logging.info("Initiating Data Transformation.")
|
26 |
-
data_transformation_artifact = data_transformation.initiate_data_transformation()
|
27 |
-
logging.info("Data Transformation Completed.")
|
28 |
-
print(data_transformation_artifact)
|
29 |
-
|
30 |
-
# Collaborative Model Training
|
31 |
-
collaborative_model_trainer_config = CollaborativeModelConfig(training_pipeline_config)
|
32 |
-
collaborative_model_trainer = CollaborativeModelTrainer(collaborative_model_trainer_config= collaborative_model_trainer_config,data_transformation_artifact=data_transformation_artifact)
|
33 |
-
logging.info("Initiating Collaborative Model training.")
|
34 |
-
collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='user_knn')
|
35 |
-
logging.info("Collaborative Model training completed.")
|
36 |
-
print(collaborative_model_trainer_artifact)
|
37 |
-
|
38 |
-
# Content Based Model Training
|
39 |
-
content_based_model_trainer_config = ContentBasedModelConfig(training_pipeline_config)
|
40 |
-
content_based_model_trainer = ContentBasedModelTrainer(content_based_model_trainer_config=content_based_model_trainer_config,data_ingestion_artifact=data_ingestion_artifact)
|
41 |
-
logging.info("Initiating Content Based Model training.")
|
42 |
-
content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
|
43 |
-
logging.info("Content Based Model training completed.")
|
44 |
-
print(content_based_model_trainer_artifact)
|
45 |
-
|
46 |
-
# Popularity Based Filtering
|
47 |
-
logging.info("Initiating Popularity based filtering.")
|
48 |
-
filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
|
49 |
-
popularity_recommendations = filtering.initiate_model_trainer(filter_type='top_avg_rated')
|
50 |
-
logging.info("Popularity based filtering completed.")
|
51 |
-
|
52 |
-
except Exception as e:
|
53 |
raise AnimeRecommendorException(e, sys)
|
|
|
1 |
+
import sys
|
2 |
+
from anime_recommender.loggers.logging import logging
|
3 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
+
from anime_recommender.components.data_ingestion import DataIngestion
|
5 |
+
from anime_recommender.entity.config_entity import TrainingPipelineConfig,DataIngestionConfig,DataTransformationConfig,CollaborativeModelConfig,ContentBasedModelConfig
|
6 |
+
from anime_recommender.components.data_transformation import DataTransformation
|
7 |
+
from anime_recommender.components.collaborative_recommender import CollaborativeModelTrainer
|
8 |
+
from anime_recommender.components.content_based_recommender import ContentBasedModelTrainer
|
9 |
+
from anime_recommender.components.top_anime_recommenders import PopularityBasedRecommendor
|
10 |
+
|
11 |
+
|
12 |
+
if __name__ == "__main__":
|
13 |
+
try:
|
14 |
+
training_pipeline_config = TrainingPipelineConfig()
|
15 |
+
data_ingestion_config = DataIngestionConfig(training_pipeline_config)
|
16 |
+
data_ingestion = DataIngestion(data_ingestion_config)
|
17 |
+
logging.info("Initiating Data Ingestion.")
|
18 |
+
data_ingestion_artifact = data_ingestion.ingest_data()
|
19 |
+
logging.info(f"Data ingestion completed.")
|
20 |
+
print(data_ingestion_artifact)
|
21 |
+
|
22 |
+
# Data Transformation
|
23 |
+
data_transformation_config = DataTransformationConfig(training_pipeline_config)
|
24 |
+
data_transformation = DataTransformation(data_ingestion_artifact,data_transformation_config)
|
25 |
+
logging.info("Initiating Data Transformation.")
|
26 |
+
data_transformation_artifact = data_transformation.initiate_data_transformation()
|
27 |
+
logging.info("Data Transformation Completed.")
|
28 |
+
print(data_transformation_artifact)
|
29 |
+
|
30 |
+
# Collaborative Model Training
|
31 |
+
collaborative_model_trainer_config = CollaborativeModelConfig(training_pipeline_config)
|
32 |
+
collaborative_model_trainer = CollaborativeModelTrainer(collaborative_model_trainer_config= collaborative_model_trainer_config,data_transformation_artifact=data_transformation_artifact)
|
33 |
+
logging.info("Initiating Collaborative Model training.")
|
34 |
+
collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='user_knn')
|
35 |
+
logging.info("Collaborative Model training completed.")
|
36 |
+
print(collaborative_model_trainer_artifact)
|
37 |
+
|
38 |
+
# Content Based Model Training
|
39 |
+
content_based_model_trainer_config = ContentBasedModelConfig(training_pipeline_config)
|
40 |
+
content_based_model_trainer = ContentBasedModelTrainer(content_based_model_trainer_config=content_based_model_trainer_config,data_ingestion_artifact=data_ingestion_artifact)
|
41 |
+
logging.info("Initiating Content Based Model training.")
|
42 |
+
content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
|
43 |
+
logging.info("Content Based Model training completed.")
|
44 |
+
print(content_based_model_trainer_artifact)
|
45 |
+
|
46 |
+
# Popularity Based Filtering
|
47 |
+
logging.info("Initiating Popularity based filtering.")
|
48 |
+
filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
|
49 |
+
popularity_recommendations = filtering.initiate_model_trainer(filter_type='top_avg_rated')
|
50 |
+
logging.info("Popularity based filtering completed.")
|
51 |
+
|
52 |
+
except Exception as e:
|
53 |
raise AnimeRecommendorException(e, sys)
|
setup.py
CHANGED
@@ -1,29 +1,29 @@
|
|
1 |
-
from setuptools import find_packages, setup
|
2 |
-
from typing import List
|
3 |
-
|
4 |
-
def get_requirements() -> List[str] :
|
5 |
-
"""
|
6 |
-
This function returns the list of requirements
|
7 |
-
"""
|
8 |
-
requirements_lst:List[str] = []
|
9 |
-
try:
|
10 |
-
with open("requirements.txt", "r") as file:
|
11 |
-
lines = file.readlines()
|
12 |
-
for line in lines:
|
13 |
-
requirement = line.strip()
|
14 |
-
if requirement and requirement != "-e .":
|
15 |
-
requirements_lst.append(requirement)
|
16 |
-
except FileNotFoundError:
|
17 |
-
print("requirements.txt file not found")
|
18 |
-
return requirements_lst
|
19 |
-
|
20 |
-
print(get_requirements())
|
21 |
-
|
22 |
-
setup(
|
23 |
-
name="AnimeRecommendationSystem",
|
24 |
-
version= "0.0.1",
|
25 |
-
author= "Krishnaveni Ponna",
|
26 |
-
author_email= "[email protected]",
|
27 |
-
packages= find_packages(),
|
28 |
-
install_requires = get_requirements()
|
29 |
)
|
|
|
1 |
+
from setuptools import find_packages, setup
|
2 |
+
from typing import List
|
3 |
+
|
4 |
+
def get_requirements() -> List[str] :
|
5 |
+
"""
|
6 |
+
This function returns the list of requirements
|
7 |
+
"""
|
8 |
+
requirements_lst:List[str] = []
|
9 |
+
try:
|
10 |
+
with open("requirements.txt", "r") as file:
|
11 |
+
lines = file.readlines()
|
12 |
+
for line in lines:
|
13 |
+
requirement = line.strip()
|
14 |
+
if requirement and requirement != "-e .":
|
15 |
+
requirements_lst.append(requirement)
|
16 |
+
except FileNotFoundError:
|
17 |
+
print("requirements.txt file not found")
|
18 |
+
return requirements_lst
|
19 |
+
|
20 |
+
print(get_requirements())
|
21 |
+
|
22 |
+
setup(
|
23 |
+
name="AnimeRecommendationSystem",
|
24 |
+
version= "0.0.1",
|
25 |
+
author= "Krishnaveni Ponna",
|
26 |
+
author_email= "[email protected]",
|
27 |
+
packages= find_packages(),
|
28 |
+
install_requires = get_requirements()
|
29 |
)
|