krishnaveni76 commited on
Commit
b4f6ffc
Β·
1 Parent(s): c2f0782

Updated all files

Browse files
.gitignore CHANGED
@@ -1,5 +1,5 @@
1
- ars/
2
- .env
3
- Artifacts/
4
- logs/
5
  __pycache__/
 
1
+ anime/
2
+ .env
3
+ Artifacts/
4
+ logs/
5
  __pycache__/
Dockerfile CHANGED
@@ -1,17 +1,17 @@
1
- # Use the official Python image as a base
2
- FROM python:3.10-slim-buster
3
-
4
- # Set the working directory in the container
5
- WORKDIR /app
6
-
7
- # Copy the app files into the container
8
- COPY . .
9
-
10
- # Install required packages
11
- RUN pip install -r requirements.txt
12
-
13
- # Expose the port that Streamlit uses
14
- EXPOSE 8501
15
-
16
- # Run the Streamlit app
17
  CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
1
+ # Use the official Python image as a base
2
+ FROM python:3.10-slim-buster
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the app files into the container
8
+ COPY . .
9
+
10
+ # Install required packages
11
+ RUN pip install -r requirements.txt
12
+
13
+ # Expose the port that Streamlit uses
14
+ EXPOSE 8501
15
+
16
+ # Run the Streamlit app
17
  CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
anime_recommender/{source β†’ components}/__init__.py RENAMED
File without changes
anime_recommender/{source β†’ components}/collaborative_recommender.py RENAMED
@@ -1,73 +1,91 @@
1
- import sys
2
- from anime_recommender.loggers.logging import logging
3
- from anime_recommender.exception.exception import AnimeRecommendorException
4
- from anime_recommender.entity.config_entity import CollaborativeModelConfig
5
- from anime_recommender.entity.artifact_entity import DataTransformationArtifact, CollaborativeModelArtifact
6
- from anime_recommender.utils.main_utils.utils import load_csv_data, save_model, load_object
7
- from anime_recommender.model_trainer.collaborative_modelling import CollaborativeAnimeRecommender
8
-
9
- class CollaborativeModelTrainer:
10
- """
11
- Class to train the model, track metrics, and save the trained model.
12
- """
13
- def __init__(self, collaborative_model_trainer_config: CollaborativeModelConfig, data_transformation_artifact: DataTransformationArtifact):
14
- try:
15
- self.collaborative_model_trainer_config = collaborative_model_trainer_config
16
- self.data_transformation_artifact = data_transformation_artifact
17
- except Exception as e:
18
- raise AnimeRecommendorException(e, sys)
19
-
20
- def initiate_model_trainer(self, model_type: str) -> CollaborativeModelArtifact:
21
- try:
22
- logging.info("Loading transformed data...")
23
- df = load_csv_data(self.data_transformation_artifact.merged_file_path)
24
- recommender = CollaborativeAnimeRecommender(df)
25
- # recommender.print_unique_user_ids()
26
- if model_type == 'svd':
27
- logging.info("Training and saving SVD model...")
28
- recommender.train_svd()
29
- save_model(recommender.svd, self.collaborative_model_trainer_config.svd_trained_model_file_path)
30
-
31
- logging.info("Loading pre-trained SVD model...")
32
- svd_model = load_object(self.collaborative_model_trainer_config.svd_trained_model_file_path)
33
- svd_recommendations = recommender.get_svd_recommendations(user_id=436, n=10, svd_model=svd_model)
34
- logging.info(f"SVD recommendations: {svd_recommendations}")
35
- return CollaborativeModelArtifact(
36
- svd_file_path=self.collaborative_model_trainer_config.svd_trained_model_file_path
37
- )
38
-
39
- elif model_type == 'item_knn':
40
- logging.info("Training and saving KNN item-based model...")
41
- recommender.train_knn_item_based()
42
- save_model(recommender.knn_item_based, self.collaborative_model_trainer_config.item_knn_trained_model_file_path)
43
-
44
- logging.info("Loading pre-trained item-based KNN model...")
45
- item_knn_model = load_object(self.collaborative_model_trainer_config.item_knn_trained_model_file_path)
46
- item_based_recommendations = recommender.get_item_based_recommendations(
47
- anime_name='One Piece', n_recommendations=10, knn_item_model=item_knn_model
48
- )
49
- logging.info(f"Item Based recommendations: {item_based_recommendations}")
50
- return CollaborativeModelArtifact(
51
- item_based_knn_file_path=self.collaborative_model_trainer_config.item_knn_trained_model_file_path
52
- )
53
-
54
- elif model_type == 'user_knn':
55
- logging.info("Training and saving KNN user-based model...")
56
- recommender.train_knn_user_based()
57
- save_model(recommender.knn_user_based, self.collaborative_model_trainer_config.user_knn_trained_model_file_path)
58
-
59
- logging.info("Loading pre-trained user-based KNN model...")
60
- user_knn_model = load_object(self.collaborative_model_trainer_config.user_knn_trained_model_file_path)
61
- user_based_recommendations = recommender.get_user_based_recommendations(
62
- user_id=817, n_recommendations=10, knn_user_model=user_knn_model
63
- )
64
- logging.info(f"User Based recommendations: {user_based_recommendations}")
65
- return CollaborativeModelArtifact(
66
- user_based_knn_file_path=self.collaborative_model_trainer_config.user_knn_trained_model_file_path
67
- )
68
-
69
- else:
70
- raise ValueError("Invalid model_type. Choose from 'svd', 'item_knn', or 'user_knn'.")
71
-
72
- except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  raise AnimeRecommendorException(f"Error in CollaborativeModelTrainer: {str(e)}", sys)
 
1
+ import sys
2
+ from anime_recommender.loggers.logging import logging
3
+ from anime_recommender.exception.exception import AnimeRecommendorException
4
+ from anime_recommender.entity.config_entity import CollaborativeModelConfig
5
+ from anime_recommender.entity.artifact_entity import DataTransformationArtifact, CollaborativeModelArtifact
6
+ from anime_recommender.utils.main_utils.utils import load_csv_data, save_model, load_object
7
+ from anime_recommender.model_trainer.collaborative_modelling import CollaborativeAnimeRecommender
8
+
9
+ class CollaborativeModelTrainer:
10
+ """
11
+ Trains and saves collaborative filtering recommendation models.
12
+
13
+ This class supports three types of models:
14
+ - Singular Value Decomposition (SVD)
15
+ - Item-based K-Nearest Neighbors (KNN)
16
+ - User-based K-Nearest Neighbors (KNN)
17
+ """
18
+ def __init__(self, collaborative_model_trainer_config: CollaborativeModelConfig, data_transformation_artifact: DataTransformationArtifact):
19
+ """
20
+ Initializes the CollaborativeModelTrainer with configuration and transformed data.
21
+
22
+ Args:
23
+ collaborative_model_trainer_config (CollaborativeModelConfig): Configuration settings for model training.
24
+ data_transformation_artifact (DataTransformationArtifact): Data artifact containing the preprocessed dataset path.
25
+ """
26
+ try:
27
+ self.collaborative_model_trainer_config = collaborative_model_trainer_config
28
+ self.data_transformation_artifact = data_transformation_artifact
29
+ except Exception as e:
30
+ raise AnimeRecommendorException(e, sys)
31
+
32
+ def initiate_model_trainer(self, model_type: str) -> CollaborativeModelArtifact:
33
+ """
34
+ Trains and saves the specified collaborative filtering model.
35
+ Args:
36
+ model_type (str): The type of model to train.
37
+ Choices: 'svd', 'item_knn', 'user_knn'.
38
+ Returns:
39
+ CollaborativeModelArtifact: Object containing the file path of the trained model.
40
+ """
41
+ try:
42
+ logging.info("Loading transformed data...")
43
+ df = load_csv_data(self.data_transformation_artifact.merged_file_path)
44
+ recommender = CollaborativeAnimeRecommender(df)
45
+
46
+ if model_type == 'svd':
47
+ logging.info("Training and saving SVD model...")
48
+ recommender.train_svd()
49
+ save_model(recommender.svd, self.collaborative_model_trainer_config.svd_trained_model_file_path)
50
+
51
+ logging.info("Loading pre-trained SVD model...")
52
+ svd_model = load_object(self.collaborative_model_trainer_config.svd_trained_model_file_path)
53
+ svd_recommendations = recommender.get_svd_recommendations(user_id=436, n=10, svd_model=svd_model)
54
+ logging.info(f"SVD recommendations: {svd_recommendations}")
55
+ return CollaborativeModelArtifact(
56
+ svd_file_path=self.collaborative_model_trainer_config.svd_trained_model_file_path
57
+ )
58
+
59
+ elif model_type == 'item_knn':
60
+ logging.info("Training and saving KNN item-based model...")
61
+ recommender.train_knn_item_based()
62
+ save_model(recommender.knn_item_based, self.collaborative_model_trainer_config.item_knn_trained_model_file_path)
63
+
64
+ logging.info("Loading pre-trained item-based KNN model...")
65
+ item_knn_model = load_object(self.collaborative_model_trainer_config.item_knn_trained_model_file_path)
66
+ item_based_recommendations = recommender.get_item_based_recommendations(
67
+ anime_name='One Piece', n_recommendations=10, knn_item_model=item_knn_model
68
+ )
69
+ logging.info(f"Item Based recommendations: {item_based_recommendations}")
70
+ return CollaborativeModelArtifact(
71
+ item_based_knn_file_path=self.collaborative_model_trainer_config.item_knn_trained_model_file_path
72
+ )
73
+
74
+ elif model_type == 'user_knn':
75
+ logging.info("Training and saving KNN user-based model...")
76
+ recommender.train_knn_user_based()
77
+ save_model(recommender.knn_user_based, self.collaborative_model_trainer_config.user_knn_trained_model_file_path)
78
+
79
+ logging.info("Loading pre-trained user-based KNN model...")
80
+ user_knn_model = load_object(self.collaborative_model_trainer_config.user_knn_trained_model_file_path)
81
+ user_based_recommendations = recommender.get_user_based_recommendations(
82
+ user_id=817, n_recommendations=10, knn_user_model=user_knn_model
83
+ )
84
+ logging.info(f"User Based recommendations: {user_based_recommendations}")
85
+ return CollaborativeModelArtifact(
86
+ user_based_knn_file_path=self.collaborative_model_trainer_config.user_knn_trained_model_file_path
87
+ )
88
+ else:
89
+ raise ValueError("Invalid model_type. Choose from 'svd', 'item_knn', or 'user_knn'.")
90
+ except Exception as e:
91
  raise AnimeRecommendorException(f"Error in CollaborativeModelTrainer: {str(e)}", sys)
anime_recommender/{source β†’ components}/content_based_recommender.py RENAMED
@@ -1,43 +1,58 @@
1
- import sys
2
- from anime_recommender.loggers.logging import logging
3
- from anime_recommender.exception.exception import AnimeRecommendorException
4
- from anime_recommender.entity.config_entity import ContentBasedModelConfig
5
- from anime_recommender.entity.artifact_entity import ContentBasedModelArtifact, DataIngestionArtifact
6
- from anime_recommender.utils.main_utils.utils import load_csv_data
7
- from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
8
- from anime_recommender.constant import *
9
-
10
- class ContentBasedModelTrainer:
11
- """Class to train the model, track metrics, and save the trained model."""
12
-
13
- def __init__(self, content_based_model_trainer_config: ContentBasedModelConfig, data_ingestion_artifact: DataIngestionArtifact):
14
- try:
15
- self.content_based_model_trainer_config = content_based_model_trainer_config
16
- self.data_ingestion_artifact = data_ingestion_artifact
17
- except Exception as e:
18
- raise AnimeRecommendorException(e, sys)
19
-
20
- def initiate_model_trainer(self) -> ContentBasedModelArtifact:
21
- try:
22
- logging.info("Loading ingested data...")
23
- df = load_csv_data(self.data_ingestion_artifact.feature_store_anime_file_path)
24
- logging.info("Training ContentBasedRecommender model...")
25
-
26
- # Initialize and train the model
27
- recommender = ContentBasedRecommender(df=df )
28
-
29
- # Save the model (TF-IDF and cosine similarity matrix)
30
- recommender.save_model(self.content_based_model_trainer_config.cosine_similarity_model_file_path)
31
- logging.info("Model saved successfully.")
32
-
33
- logging.info("Loading saved model to get recommendations...")
34
- cosine_recommendations = recommender.get_rec_cosine(title="One Piece", model_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path, n_recommendations=10)
35
- logging.info(f"Cosine similarity recommendations: {cosine_recommendations}")
36
-
37
- # Return artifact with saved model path
38
- content_model_trainer_artifact = ContentBasedModelArtifact(
39
- cosine_similarity_model_file_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path
40
- )
41
- return content_model_trainer_artifact
42
- except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  raise AnimeRecommendorException(f"Error in ContentBasedModelTrainer: {str(e)}", sys)
 
1
+ import sys
2
+ from anime_recommender.loggers.logging import logging
3
+ from anime_recommender.exception.exception import AnimeRecommendorException
4
+ from anime_recommender.entity.config_entity import ContentBasedModelConfig
5
+ from anime_recommender.entity.artifact_entity import ContentBasedModelArtifact, DataIngestionArtifact
6
+ from anime_recommender.utils.main_utils.utils import load_csv_data
7
+ from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
8
+ from anime_recommender.constant import *
9
+
10
+ class ContentBasedModelTrainer:
11
+ """
12
+ A class responsible for training and saving the content-based recommender model.
13
+ """
14
+ def __init__(self, content_based_model_trainer_config: ContentBasedModelConfig, data_ingestion_artifact: DataIngestionArtifact):
15
+ """
16
+ Initializes the ContentBasedModelTrainer with configuration and data ingestion artifacts.
17
+
18
+ Args:
19
+ content_based_model_trainer_config (ContentBasedModelConfig): Configuration settings for model training.
20
+ data_ingestion_artifact (DataIngestionArtifact): Data ingestion artifact containing the dataset path.
21
+ """
22
+ try:
23
+ self.content_based_model_trainer_config = content_based_model_trainer_config
24
+ self.data_ingestion_artifact = data_ingestion_artifact
25
+ except Exception as e:
26
+ raise AnimeRecommendorException(e, sys)
27
+
28
+ def initiate_model_trainer(self) -> ContentBasedModelArtifact:
29
+ """
30
+ Trains the content-based recommender model using TF-IDF and cosine similarity,
31
+ saves the trained model, and retrieves recommendations.
32
+
33
+ Returns:
34
+ ContentBasedModelArtifact: Object containing the path to the saved content-based model.
35
+ """
36
+ try:
37
+ logging.info("Loading ingested data...")
38
+ df = load_csv_data(self.data_ingestion_artifact.feature_store_anime_file_path)
39
+ logging.info("Training ContentBasedRecommender model...")
40
+
41
+ # Initialize and train the model
42
+ recommender = ContentBasedRecommender(df=df )
43
+
44
+ # Save the model (TF-IDF and cosine similarity matrix)
45
+ recommender.save_model(self.content_based_model_trainer_config.cosine_similarity_model_file_path)
46
+ logging.info("Model saved successfully.")
47
+
48
+ logging.info("Loading saved model to get recommendations...")
49
+ cosine_recommendations = recommender.get_rec_cosine(title="One Piece", model_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path, n_recommendations=10)
50
+ logging.info(f"Cosine similarity recommendations: {cosine_recommendations}")
51
+
52
+ # Return artifact with saved model path
53
+ content_model_trainer_artifact = ContentBasedModelArtifact(
54
+ cosine_similarity_model_file_path=self.content_based_model_trainer_config.cosine_similarity_model_file_path
55
+ )
56
+ return content_model_trainer_artifact
57
+ except Exception as e:
58
  raise AnimeRecommendorException(f"Error in ContentBasedModelTrainer: {str(e)}", sys)
anime_recommender/{source β†’ components}/data_ingestion.py RENAMED
@@ -1,58 +1,82 @@
1
- import os
2
- import sys
3
- import pandas as pd
4
- from datasets import load_dataset
5
- from anime_recommender.loggers.logging import logging
6
- from anime_recommender.exception.exception import AnimeRecommendorException
7
- from anime_recommender.entity.config_entity import DataIngestionConfig
8
- from anime_recommender.entity.artifact_entity import DataIngestionArtifact
9
- from anime_recommender.utils.main_utils.utils import export_data_to_dataframe
10
-
11
- class DataIngestion:
12
- def __init__(self, data_ingestion_config: DataIngestionConfig):
13
- try:
14
- self.data_ingestion_config = data_ingestion_config
15
- except Exception as e:
16
- raise AnimeRecommendorException(e, sys)
17
-
18
- def fetch_data_from_huggingface(self, dataset_path: str, split: str = None) -> pd.DataFrame:
19
- try:
20
- logging.info(f"Fetching data from Hugging Face dataset: {dataset_path}")
21
- # Load dataset from Hugging Face
22
- dataset = load_dataset(dataset_path, split=split)
23
-
24
- # Convert dataset to pandas DataFrame
25
- df = pd.DataFrame(dataset['train'])
26
-
27
- # Log some information about the data
28
- logging.info(f"Shape of the dataframe: {df.shape}")
29
- logging.info(f"Column names: {df.columns}")
30
- logging.info(f"Preview of the DataFrame:\n{df.head()}")
31
- logging.info("Data fetched successfully from Hugging Face.")
32
-
33
- return df
34
-
35
- except Exception as e:
36
- logging.error(f"An error occurred while fetching data: {str(e)}")
37
- raise AnimeRecommendorException(e, sys)
38
-
39
- def ingest_data(self) -> DataIngestionArtifact:
40
- try:
41
- # Load anime and rating data from Hugging Face datasets
42
- anime_df = self.fetch_data_from_huggingface(self.data_ingestion_config.anime_filepath)
43
- rating_df = self.fetch_data_from_huggingface(self.data_ingestion_config.rating_filepath)
44
-
45
- # Export data to DataFrame
46
- export_data_to_dataframe(anime_df, file_path=self.data_ingestion_config.feature_store_anime_file_path)
47
- export_data_to_dataframe(rating_df, file_path=self.data_ingestion_config.feature_store_userrating_file_path)
48
-
49
- # Create artifact to store data ingestion info
50
- dataingestionartifact = DataIngestionArtifact(
51
- feature_store_anime_file_path=self.data_ingestion_config.feature_store_anime_file_path,
52
- feature_store_userrating_file_path=self.data_ingestion_config.feature_store_userrating_file_path
53
- )
54
-
55
- return dataingestionartifact
56
-
57
- except Exception as e:
58
- raise AnimeRecommendorException(e, sys)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import pandas as pd
3
+ from datasets import load_dataset
4
+ from anime_recommender.loggers.logging import logging
5
+ from anime_recommender.exception.exception import AnimeRecommendorException
6
+ from anime_recommender.entity.config_entity import DataIngestionConfig
7
+ from anime_recommender.entity.artifact_entity import DataIngestionArtifact
8
+ from anime_recommender.utils.main_utils.utils import export_data_to_dataframe
9
+
10
+ class DataIngestion:
11
+ """
12
+ A class responsible for data ingestion in the anime recommender system.
13
+
14
+ This class fetches data from Hugging Face datasets, converts it into pandas DataFrame format,
15
+ and exports the processed data to storage for further use in the pipeline.
16
+ """
17
+ def __init__(self, data_ingestion_config: DataIngestionConfig):
18
+ """
19
+ Initializes the DataIngestion class with the provided configuration.
20
+
21
+ Args:
22
+ data_ingestion_config (DataIngestionConfig): Configuration settings for data ingestion.
23
+ """
24
+ try:
25
+ self.data_ingestion_config = data_ingestion_config
26
+ except Exception as e:
27
+ raise AnimeRecommendorException(e, sys)
28
+
29
+ def fetch_data_from_huggingface(self, dataset_path: str, split: str = None) -> pd.DataFrame:
30
+ """
31
+ Fetches a dataset from Hugging Face and converts it into a pandas DataFrame.
32
+ Args:
33
+ dataset_path (str): The path to the Hugging Face dataset.
34
+ split (str, optional): The dataset split to be fetched (e.g., 'train', 'test'). Defaults to None.
35
+
36
+ Returns:
37
+ pd.DataFrame: The dataset converted into a pandas DataFrame.
38
+ """
39
+ try:
40
+ logging.info(f"Fetching data from Hugging Face dataset: {dataset_path}")
41
+ # Load dataset from Hugging Face
42
+ dataset = load_dataset(dataset_path, split=split)
43
+
44
+ # Convert dataset to pandas DataFrame
45
+ df = pd.DataFrame(dataset['train'])
46
+
47
+ # Log some information about the data
48
+ logging.info(f"Shape of the dataframe: {df.shape}")
49
+ logging.info(f"Column names: {df.columns}")
50
+ logging.info(f"Preview of the DataFrame:\n{df.head()}")
51
+ logging.info("Data fetched successfully from Hugging Face.")
52
+
53
+ return df
54
+
55
+ except Exception as e:
56
+ logging.error(f"An error occurred while fetching data: {str(e)}")
57
+ raise AnimeRecommendorException(e, sys)
58
+
59
+ def ingest_data(self) -> DataIngestionArtifact:
60
+ """
61
+ Orchestrates the data ingestion process, fetching datasets and saving them to the feature store.
62
+ Returns:
63
+ DataIngestionArtifact: An artifact containing paths to the ingested datasets.
64
+ """
65
+ try:
66
+ # Load anime and rating data from Hugging Face datasets
67
+ anime_df = self.fetch_data_from_huggingface(self.data_ingestion_config.anime_filepath)
68
+ rating_df = self.fetch_data_from_huggingface(self.data_ingestion_config.rating_filepath)
69
+
70
+ # Export data to DataFrame
71
+ export_data_to_dataframe(anime_df, file_path=self.data_ingestion_config.feature_store_anime_file_path)
72
+ export_data_to_dataframe(rating_df, file_path=self.data_ingestion_config.feature_store_userrating_file_path)
73
+
74
+ # Create artifact to store data ingestion info
75
+ dataingestionartifact = DataIngestionArtifact(
76
+ feature_store_anime_file_path=self.data_ingestion_config.feature_store_anime_file_path,
77
+ feature_store_userrating_file_path=self.data_ingestion_config.feature_store_userrating_file_path
78
+ )
79
+ return dataingestionartifact
80
+
81
+ except Exception as e:
82
+ raise AnimeRecommendorException(e, sys)
anime_recommender/{source β†’ components}/data_transformation.py RENAMED
@@ -1,115 +1,109 @@
1
- import sys
2
- import numpy as np
3
- import pandas as pd
4
-
5
- from anime_recommender.loggers.logging import logging
6
- from anime_recommender.exception.exception import AnimeRecommendorException
7
- from anime_recommender.utils.main_utils.utils import export_data_to_dataframe
8
- from anime_recommender.constant import *
9
- from anime_recommender.entity.config_entity import DataTransformationConfig
10
- from anime_recommender.entity.artifact_entity import DataIngestionArtifact,DataTransformationArtifact
11
-
12
- class DataTransformation:
13
- """
14
- Class for handling data transformation for energy generation models.
15
- """
16
- def __init__(self,data_ingestion_artifact:DataIngestionArtifact,data_transformation_config:DataTransformationConfig):
17
- """
18
- Initializes the DataTransformation class with the given data ingestion and configuration artifacts.
19
-
20
- Args:
21
- data_ingestion_artifact (DataIngestionArtifact): The artifact containing ingested data paths.
22
- data_transformation_config (DataTransformationConfig): Configuration object for data transformation.
23
- """
24
- try:
25
- self.data_ingestion_artifact = data_ingestion_artifact
26
- self.data_transformation_config = data_transformation_config
27
- except Exception as e:
28
- raise AnimeRecommendorException(e,sys)
29
-
30
- @staticmethod
31
- def read_data(file_path)->pd.DataFrame:
32
- """
33
- Reads data from a CSV file.
34
-
35
- Args:
36
- file_path (str): Path to the CSV file.
37
-
38
- Returns:
39
- pd.DataFrame: The DataFrame containing the data from the CSV file.
40
- """
41
- try:
42
- return pd.read_csv(file_path)
43
- except Exception as e:
44
- raise AnimeRecommendorException(e,sys)
45
-
46
- @staticmethod
47
- def merge_data(anime_df: pd.DataFrame, rating_df: pd.DataFrame) -> pd.DataFrame:
48
- """
49
- Merges the anime and rating DataFrames on 'anime_id'.
50
-
51
- Args:
52
- anime_df (pd.DataFrame): DataFrame containing anime information.
53
- rating_df (pd.DataFrame): DataFrame containing user rating information.
54
-
55
- Returns:
56
- pd.DataFrame: Merged DataFrame on 'anime_id'.
57
- """
58
- try:
59
- merged_df = pd.merge(rating_df, anime_df, on="anime_id", how="inner")
60
- logging.info(f"Shape of the Merged dataframe:{merged_df.shape}")
61
- logging.info(f"Column names: {merged_df.columns}")
62
- return merged_df
63
- except Exception as e:
64
- raise AnimeRecommendorException(e, sys)
65
-
66
- @staticmethod
67
- def clean_filter_data(merged_df: pd.DataFrame) -> pd.DataFrame:
68
- """
69
- Cleans the merged DataFrame by replacing 'UNKNOWN' with NaN, filling NaN values with median and also filters the data.
70
-
71
- Args:
72
- merged_df (pd.DataFrame): Merged DataFrame to clean and filter.
73
-
74
- Returns:
75
- pd.DataFrame: Cleaned and Filtered DataFrame with NaN values handled.
76
- """
77
- try:
78
- merged_df['average_rating'].replace('UNKNOWN', np.nan)
79
- merged_df['average_rating'] = pd.to_numeric(merged_df['average_rating'], errors='coerce')
80
- merged_df['average_rating'].fillna(merged_df['average_rating'].median())
81
- merged_df = merged_df[merged_df['average_rating'] > 6]
82
- cols_to_drop = [ 'username', 'overview', 'type', 'episodes', 'producers',
83
- 'licensors', 'studios', 'source', 'rank', 'popularity',
84
- 'favorites', 'scored by', 'members' ]
85
- cleaned_df = merged_df.copy()
86
- cleaned_df.drop(columns=cols_to_drop, inplace=True)
87
- logging.info(f"Shape of the Merged dataframe:{cleaned_df.shape}")
88
- logging.info(f"Column names: {cleaned_df.columns}")
89
- logging.info(f"Preview of the merged DataFrame:\n{cleaned_df.head()}")
90
- return cleaned_df
91
- except Exception as e:
92
- raise AnimeRecommendorException(e, sys)
93
-
94
- def initiate_data_transformation(self)->DataTransformationArtifact:
95
- """
96
- Initiates the data transformation process by reading, transforming, and saving the data.
97
-
98
- Returns:
99
- DataTransformationArtifact: The artifact containing paths to the transformed data.
100
- """
101
- logging.info("Entering initiate_data_transformation method of DataTransformation class.")
102
- try:
103
- anime_df = DataTransformation.read_data(self.data_ingestion_artifact.feature_store_anime_file_path)
104
- rating_df = DataTransformation.read_data(self.data_ingestion_artifact.feature_store_userrating_file_path)
105
- merged_df = DataTransformation.merge_data(anime_df, rating_df)
106
- transformed_df = DataTransformation.clean_filter_data(merged_df)
107
-
108
- export_data_to_dataframe(transformed_df, self.data_transformation_config.merged_file_path)
109
- data_transformation_artifact = DataTransformationArtifact(
110
- merged_file_path=self.data_transformation_config.merged_file_path
111
- )
112
-
113
- return data_transformation_artifact
114
- except Exception as e:
115
  raise AnimeRecommendorException(e,sys)
 
1
+ import sys
2
+ import numpy as np
3
+ import pandas as pd
4
+ from anime_recommender.loggers.logging import logging
5
+ from anime_recommender.exception.exception import AnimeRecommendorException
6
+ from anime_recommender.utils.main_utils.utils import export_data_to_dataframe
7
+ from anime_recommender.constant import *
8
+ from anime_recommender.entity.config_entity import DataTransformationConfig
9
+ from anime_recommender.entity.artifact_entity import DataIngestionArtifact,DataTransformationArtifact
10
+
11
+ class DataTransformation:
12
+ """
13
+ Class for handling data transformation for energy generation models.
14
+ """
15
+ def __init__(self,data_ingestion_artifact:DataIngestionArtifact,data_transformation_config:DataTransformationConfig):
16
+ """
17
+ Initializes the DataTransformation class with the given data ingestion and configuration artifacts.
18
+ Args:
19
+ data_ingestion_artifact (DataIngestionArtifact): The artifact containing ingested data paths.
20
+ data_transformation_config (DataTransformationConfig): Configuration object for data transformation.
21
+ """
22
+ try:
23
+ self.data_ingestion_artifact = data_ingestion_artifact
24
+ self.data_transformation_config = data_transformation_config
25
+ except Exception as e:
26
+ raise AnimeRecommendorException(e,sys)
27
+
28
+ @staticmethod
29
+ def read_data(file_path)->pd.DataFrame:
30
+ """
31
+ Reads data from a CSV file.
32
+ Args:
33
+ file_path (str): Path to the CSV file.
34
+ Returns:
35
+ pd.DataFrame: The DataFrame containing the data from the CSV file.
36
+ """
37
+ try:
38
+ return pd.read_csv(file_path)
39
+ except Exception as e:
40
+ raise AnimeRecommendorException(e,sys)
41
+
42
+ @staticmethod
43
+ def merge_data(anime_df: pd.DataFrame, rating_df: pd.DataFrame) -> pd.DataFrame:
44
+ """
45
+ Merges the anime and rating DataFrames on 'anime_id'.
46
+ Args:
47
+ anime_df (pd.DataFrame): DataFrame containing anime information.
48
+ rating_df (pd.DataFrame): DataFrame containing user rating information.
49
+ Returns:
50
+ pd.DataFrame: Merged DataFrame on 'anime_id'.
51
+ """
52
+ try:
53
+ merged_df = pd.merge(rating_df, anime_df, on="anime_id", how="inner")
54
+ logging.info(f"Shape of the Merged dataframe:{merged_df.shape}")
55
+ logging.info(f"Column names: {merged_df.columns}")
56
+ return merged_df
57
+ except Exception as e:
58
+ raise AnimeRecommendorException(e, sys)
59
+
60
+ @staticmethod
61
+ def clean_filter_data(merged_df: pd.DataFrame) -> pd.DataFrame:
62
+ """
63
+ Cleans the merged DataFrame by replacing 'UNKNOWN' with NaN, filling NaN values with median and also filters the data.
64
+
65
+ Args:
66
+ merged_df (pd.DataFrame): Merged DataFrame to clean and filter.
67
+
68
+ Returns:
69
+ pd.DataFrame: Cleaned and Filtered DataFrame with NaN values handled.
70
+ """
71
+ try:
72
+ merged_df['average_rating'].replace('UNKNOWN', np.nan)
73
+ merged_df['average_rating'] = pd.to_numeric(merged_df['average_rating'], errors='coerce')
74
+ merged_df['average_rating'].fillna(merged_df['average_rating'].median())
75
+ merged_df = merged_df[merged_df['average_rating'] > 6]
76
+ cols_to_drop = [ 'username', 'overview', 'type', 'episodes', 'producers',
77
+ 'licensors', 'studios', 'source', 'rank', 'popularity',
78
+ 'favorites', 'scored by', 'members' ]
79
+ cleaned_df = merged_df.copy()
80
+ cleaned_df.drop(columns=cols_to_drop, inplace=True)
81
+ logging.info(f"Shape of the Merged dataframe:{cleaned_df.shape}")
82
+ logging.info(f"Column names: {cleaned_df.columns}")
83
+ logging.info(f"Preview of the merged DataFrame:\n{cleaned_df.head()}")
84
+ return cleaned_df
85
+ except Exception as e:
86
+ raise AnimeRecommendorException(e, sys)
87
+
88
+ def initiate_data_transformation(self)->DataTransformationArtifact:
89
+ """
90
+ Initiates the data transformation process by reading, transforming, and saving the data.
91
+
92
+ Returns:
93
+ DataTransformationArtifact: The artifact containing paths to the transformed data.
94
+ """
95
+ logging.info("Entering initiate_data_transformation method of DataTransformation class.")
96
+ try:
97
+ anime_df = DataTransformation.read_data(self.data_ingestion_artifact.feature_store_anime_file_path)
98
+ rating_df = DataTransformation.read_data(self.data_ingestion_artifact.feature_store_userrating_file_path)
99
+ merged_df = DataTransformation.merge_data(anime_df, rating_df)
100
+ transformed_df = DataTransformation.clean_filter_data(merged_df)
101
+
102
+ export_data_to_dataframe(transformed_df, self.data_transformation_config.merged_file_path)
103
+ data_transformation_artifact = DataTransformationArtifact(
104
+ merged_file_path=self.data_transformation_config.merged_file_path
105
+ )
106
+
107
+ return data_transformation_artifact
108
+ except Exception as e:
 
 
 
 
 
 
109
  raise AnimeRecommendorException(e,sys)
anime_recommender/{source β†’ components}/top_anime_recommenders.py RENAMED
@@ -1,53 +1,75 @@
1
- import sys
2
- from anime_recommender.exception.exception import AnimeRecommendorException
3
- from anime_recommender.loggers.logging import logging
4
- from anime_recommender.utils.main_utils.utils import load_csv_data
5
- from anime_recommender.entity.artifact_entity import DataIngestionArtifact
6
- from anime_recommender.model_trainer.top_anime_filtering import PopularityBasedFiltering
7
-
8
-
9
- class PopularityBasedRecommendor:
10
-
11
- def __init__(self,data_ingestion_artifact = DataIngestionArtifact):
12
- try:
13
- self.data_ingestion_artifact = data_ingestion_artifact
14
- except Exception as e:
15
- raise AnimeRecommendorException(e,sys)
16
-
17
- def initiate_model_trainer(self,filter_type:str):
18
- try:
19
- logging.info("Loading transformed data...")
20
- df = load_csv_data(self.data_ingestion_artifact.feature_store_anime_file_path)
21
-
22
- recommender = PopularityBasedFiltering(df)
23
-
24
- if filter_type == 'popular_animes':
25
- popular_animes = recommender.popular_animes(n =10)
26
- logging.info(f"Popular Anime recommendations: {popular_animes}")
27
-
28
- elif filter_type == 'top_ranked_animes':
29
- top_ranked_animes = recommender.top_ranked_animes(n =10)
30
- logging.info(f"top_ranked_animes recommendations: {top_ranked_animes}")
31
-
32
- elif filter_type == 'overall_top_rated_animes':
33
- overall_top_rated_animes = recommender.overall_top_rated_animes(n =10)
34
- logging.info(f"overall_top_rated_animes recommendations: {overall_top_rated_animes}")
35
-
36
- elif filter_type == 'favorite_animes':
37
- favorite_animes = recommender.favorite_animes(n =10)
38
- logging.info(f"favorite_animes recommendations: {favorite_animes}")
39
-
40
- elif filter_type == 'top_animes_members':
41
- top_animes_members = recommender.top_animes_members(n = 10)
42
- logging.info(f"top_animes_members recommendations: {top_animes_members}")
43
-
44
- elif filter_type == 'popular_anime_among_members':
45
- popular_anime_among_members = recommender.popular_anime_among_members(n =10)
46
- logging.info(f"popular_anime_among_members recommendations: {popular_anime_among_members}")
47
-
48
- elif filter_type == 'top_avg_rated':
49
- top_avg_rated = recommender.top_avg_rated(n =10)
50
- logging.info(f"top_avg_rated recommendations: {top_avg_rated}")
51
-
52
- except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  raise AnimeRecommendorException(e,sys)
 
1
+ import sys
2
+ from anime_recommender.exception.exception import AnimeRecommendorException
3
+ from anime_recommender.loggers.logging import logging
4
+ from anime_recommender.utils.main_utils.utils import load_csv_data
5
+ from anime_recommender.entity.artifact_entity import DataIngestionArtifact
6
+ from anime_recommender.model_trainer.top_anime_filtering import PopularityBasedFiltering
7
+
8
+ class PopularityBasedRecommendor:
9
+ """
10
+ A class that provides anime recommendations based on different popularity criteria.
11
+ """
12
+ def __init__(self,data_ingestion_artifact = DataIngestionArtifact):
13
+ """
14
+ Initializes the PopularityBasedRecommendor with the ingested anime dataset.
15
+
16
+ Args:
17
+ data_ingestion_artifact (DataIngestionArtifact): An artifact containing the feature store file paths.
18
+ """
19
+ try:
20
+ self.data_ingestion_artifact = data_ingestion_artifact
21
+ except Exception as e:
22
+ raise AnimeRecommendorException(e,sys)
23
+
24
+ def initiate_model_trainer(self,filter_type:str):
25
+ """
26
+ Trains the popularity-based recommender model and logs the top anime recommendations
27
+ based on the specified filter type.
28
+
29
+ Args:
30
+ filter_type (str): The type of filtering to apply.
31
+ Options include:
32
+ - 'popular_animes': Most popular anime based on user engagement.
33
+ - 'top_ranked_animes': Highest ranked anime.
34
+ - 'overall_top_rated_animes': Overall top-rated anime.
35
+ - 'favorite_animes': Most favorited anime.
36
+ - 'top_animes_members': Anime with the highest number of members.
37
+ - 'popular_anime_among_members': Most popular anime among members.
38
+ - 'top_avg_rated': Anime with the highest average ratings.
39
+ """
40
+ try:
41
+ logging.info("Loading transformed data...")
42
+ df = load_csv_data(self.data_ingestion_artifact.feature_store_anime_file_path)
43
+
44
+ recommender = PopularityBasedFiltering(df)
45
+
46
+ if filter_type == 'popular_animes':
47
+ popular_animes = recommender.popular_animes(n =10)
48
+ logging.info(f"Popular Anime recommendations: {popular_animes}")
49
+
50
+ elif filter_type == 'top_ranked_animes':
51
+ top_ranked_animes = recommender.top_ranked_animes(n =10)
52
+ logging.info(f"top_ranked_animes recommendations: {top_ranked_animes}")
53
+
54
+ elif filter_type == 'overall_top_rated_animes':
55
+ overall_top_rated_animes = recommender.overall_top_rated_animes(n =10)
56
+ logging.info(f"overall_top_rated_animes recommendations: {overall_top_rated_animes}")
57
+
58
+ elif filter_type == 'favorite_animes':
59
+ favorite_animes = recommender.favorite_animes(n =10)
60
+ logging.info(f"favorite_animes recommendations: {favorite_animes}")
61
+
62
+ elif filter_type == 'top_animes_members':
63
+ top_animes_members = recommender.top_animes_members(n = 10)
64
+ logging.info(f"top_animes_members recommendations: {top_animes_members}")
65
+
66
+ elif filter_type == 'popular_anime_among_members':
67
+ popular_anime_among_members = recommender.popular_anime_among_members(n =10)
68
+ logging.info(f"popular_anime_among_members recommendations: {popular_anime_among_members}")
69
+
70
+ elif filter_type == 'top_avg_rated':
71
+ top_avg_rated = recommender.top_avg_rated(n =10)
72
+ logging.info(f"top_avg_rated recommendations: {top_avg_rated}")
73
+
74
+ except Exception as e:
75
  raise AnimeRecommendorException(e,sys)
anime_recommender/constant/__init__.py CHANGED
@@ -1,40 +1,40 @@
1
- """
2
- Defining common constant variables for training pipeline
3
- """
4
- PIPELINE_NAME: str = "AnimeRecommender"
5
- ARTIFACT_DIR: str = "Artifacts"
6
- ANIME_FILE_NAME: str = "Animes.csv"
7
- RATING_FILE_NAME:str = "UserRatings.csv"
8
- MERGED_FILE_NAME:str = "Anime_UserRatings.csv"
9
-
10
- ANIME_FILE_PATH:str = "krishnaveni76/Animes"
11
- RATING_FILE_PATH:str = "krishnaveni76/UserRatings"
12
- ANIMEUSERRATINGS_FILE_PATH:str = "krishnaveni76/Anime_UserRatings"
13
- MODELS_FILEPATH = "krishnaveni76/anime-recommendation-models"
14
-
15
- """
16
- Data Ingestion related constant start with DATA_INGESTION VAR NAME
17
- """
18
- DATA_INGESTION_DIR_NAME: str = "data_ingestion"
19
- DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store"
20
- DATA_INGESTION_INGESTED_DIR: str = "ingested"
21
-
22
- """
23
- Data Transformation related constant start with DATA_VALIDATION VAR NAME
24
- """
25
- DATA_TRANSFORMATION_DIR:str = "data_transformation"
26
- DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR:str = "transformed"
27
-
28
- """
29
- Model Trainer related constant start with MODEL TRAINER VAR NAME
30
- """
31
- MODEL_TRAINER_DIR_NAME: str = "trained_models"
32
-
33
- MODEL_TRAINER_COL_TRAINED_MODEL_DIR: str = "collaborative_recommenders"
34
- MODEL_TRAINER_SVD_TRAINED_MODEL_NAME: str = "svd.pkl"
35
- MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME: str = "itembasedknn.pkl"
36
- MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME: str = "userbasedknn.pkl"
37
-
38
- MODEL_TRAINER_CON_TRAINED_MODEL_DIR:str = "content_based_recommenders"
39
- MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME:str = "cosine_similarity.pkl"
40
 
 
1
+ """
2
+ Defining common constant variables for training pipeline
3
+ """
4
+ PIPELINE_NAME: str = "AnimeRecommender"
5
+ ARTIFACT_DIR: str = "Artifacts"
6
+ ANIME_FILE_NAME: str = "Animes.csv"
7
+ RATING_FILE_NAME:str = "UserRatings.csv"
8
+ MERGED_FILE_NAME:str = "Anime_UserRatings.csv"
9
+
10
+ ANIME_FILE_PATH:str = "krishnaveni76/Animes"
11
+ RATING_FILE_PATH:str = "krishnaveni76/UserRatings"
12
+ ANIMEUSERRATINGS_FILE_PATH:str = "krishnaveni76/Anime_UserRatings"
13
+ MODELS_FILEPATH = "krishnaveni76/anime-recommendation-models"
14
+
15
+ """
16
+ Data Ingestion related constant start with DATA_INGESTION VAR NAME
17
+ """
18
+ DATA_INGESTION_DIR_NAME: str = "data_ingestion"
19
+ DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store"
20
+ DATA_INGESTION_INGESTED_DIR: str = "ingested"
21
+
22
+ """
23
+ Data Transformation related constant start with DATA_VALIDATION VAR NAME
24
+ """
25
+ DATA_TRANSFORMATION_DIR:str = "data_transformation"
26
+ DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR:str = "transformed"
27
+
28
+ """
29
+ Model Trainer related constant start with MODEL TRAINER VAR NAME
30
+ """
31
+ MODEL_TRAINER_DIR_NAME: str = "trained_models"
32
+
33
+ MODEL_TRAINER_COL_TRAINED_MODEL_DIR: str = "collaborative_recommenders"
34
+ MODEL_TRAINER_SVD_TRAINED_MODEL_NAME: str = "svd.pkl"
35
+ MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME: str = "itembasedknn.pkl"
36
+ MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME: str = "userbasedknn.pkl"
37
+
38
+ MODEL_TRAINER_CON_TRAINED_MODEL_DIR:str = "content_based_recommenders"
39
+ MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME:str = "cosine_similarity.pkl"
40
 
anime_recommender/entity/artifact_entity.py CHANGED
@@ -1,21 +1,21 @@
1
- from dataclasses import dataclass
2
- from typing import Optional
3
-
4
- @dataclass
5
- class DataIngestionArtifact:
6
- feature_store_anime_file_path:str
7
- feature_store_userrating_file_path:str
8
-
9
- @dataclass
10
- class DataTransformationArtifact:
11
- merged_file_path:str
12
-
13
- @dataclass
14
- class CollaborativeModelArtifact:
15
- svd_file_path: Optional[str] = None
16
- item_based_knn_file_path: Optional[str] = None
17
- user_based_knn_file_path: Optional[str] = None
18
-
19
- @dataclass
20
- class ContentBasedModelArtifact:
21
  cosine_similarity_model_file_path:str
 
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+ @dataclass
5
+ class DataIngestionArtifact:
6
+ feature_store_anime_file_path:str
7
+ feature_store_userrating_file_path:str
8
+
9
+ @dataclass
10
+ class DataTransformationArtifact:
11
+ merged_file_path:str
12
+
13
+ @dataclass
14
+ class CollaborativeModelArtifact:
15
+ svd_file_path: Optional[str] = None
16
+ item_based_knn_file_path: Optional[str] = None
17
+ user_based_knn_file_path: Optional[str] = None
18
+
19
+ @dataclass
20
+ class ContentBasedModelArtifact:
21
  cosine_similarity_model_file_path:str
anime_recommender/entity/config_entity.py CHANGED
@@ -1,66 +1,66 @@
1
- import os
2
- from datetime import datetime
3
- from anime_recommender.constant import *
4
-
5
- class TrainingPipelineConfig:
6
- """
7
- Configuration for the training pipeline, including artifact directory and timestamp.
8
- """
9
- def __init__(self, timestamp=datetime.now()):
10
- """
11
- Initialize the configuration with a unique timestamp.
12
- """
13
- timestamp = timestamp.strftime("%m_%d_%Y_%H_%M_%S")
14
- self.pipeline_name = PIPELINE_NAME
15
- self.artifact_dir = os.path.join(ARTIFACT_DIR, timestamp)
16
- self.model_dir=os.path.join("final_model")
17
- self.timestamp: str = timestamp
18
-
19
- class DataIngestionConfig:
20
- """
21
- Configuration for data ingestion, including paths for feature store, train, test, and validation files.
22
- """
23
- def __init__(self, training_pipeline_config: TrainingPipelineConfig):
24
- """
25
- Initialize data ingestion paths and parameters.
26
- """
27
- self.data_ingestion_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_INGESTION_DIR_NAME)
28
- self.feature_store_anime_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, ANIME_FILE_NAME)
29
- self.feature_store_userrating_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, RATING_FILE_NAME)
30
- self.anime_filepath: str = ANIME_FILE_PATH
31
- self.rating_filepath: str = RATING_FILE_PATH
32
-
33
- class DataTransformationConfig:
34
- """
35
- Configuration for data transformation, including paths for transformed data and preprocessing objects.
36
- """
37
- def __init__(self,training_pipeline_config:TrainingPipelineConfig):
38
- """
39
- Initialize data transformation paths.
40
- """
41
- self.data_transformation_dir:str = os.path.join(training_pipeline_config.artifact_dir,DATA_TRANSFORMATION_DIR)
42
- self.merged_file_path:str = os.path.join(self.data_transformation_dir,DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR,MERGED_FILE_NAME)
43
-
44
- class CollaborativeModelConfig:
45
- """
46
- Configuration for model training, including paths for trained models.
47
- """
48
- def __init__(self,training_pipeline_config:TrainingPipelineConfig):
49
- """
50
- Initialize model trainer paths.
51
- """
52
- self.model_trainer_dir:str = os.path.join(training_pipeline_config.artifact_dir,MODEL_TRAINER_DIR_NAME)
53
- self.svd_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
54
- self.user_knn_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
55
- self.item_knn_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
56
-
57
- class ContentBasedModelConfig:
58
- """
59
- Configuration for model training, including paths for trained models.
60
- """
61
- def __init__(self,training_pipeline_config:TrainingPipelineConfig):
62
- """
63
- Initialize model trainer paths.
64
- """
65
- self.model_trainer_dir:str = os.path.join(training_pipeline_config.artifact_dir,MODEL_TRAINER_DIR_NAME)
66
  self.cosine_similarity_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_CON_TRAINED_MODEL_DIR,MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
 
1
+ import os
2
+ from datetime import datetime
3
+ from anime_recommender.constant import *
4
+
5
+ class TrainingPipelineConfig:
6
+ """
7
+ Configuration for the training pipeline, including artifact directory and timestamp.
8
+ """
9
+ def __init__(self, timestamp=datetime.now()):
10
+ """
11
+ Initialize the configuration with a unique timestamp.
12
+ """
13
+ timestamp = timestamp.strftime("%m_%d_%Y_%H_%M_%S")
14
+ self.pipeline_name = PIPELINE_NAME
15
+ self.artifact_dir = os.path.join(ARTIFACT_DIR, timestamp)
16
+ self.model_dir=os.path.join("final_model")
17
+ self.timestamp: str = timestamp
18
+
19
+ class DataIngestionConfig:
20
+ """
21
+ Configuration for data ingestion, including paths for feature store, train, test, and validation files.
22
+ """
23
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
24
+ """
25
+ Initialize data ingestion paths and parameters.
26
+ """
27
+ self.data_ingestion_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_INGESTION_DIR_NAME)
28
+ self.feature_store_anime_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, ANIME_FILE_NAME)
29
+ self.feature_store_userrating_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, RATING_FILE_NAME)
30
+ self.anime_filepath: str = ANIME_FILE_PATH
31
+ self.rating_filepath: str = RATING_FILE_PATH
32
+
33
+ class DataTransformationConfig:
34
+ """
35
+ Configuration for data transformation, including paths for transformed data and preprocessing objects.
36
+ """
37
+ def __init__(self,training_pipeline_config:TrainingPipelineConfig):
38
+ """
39
+ Initialize data transformation paths.
40
+ """
41
+ self.data_transformation_dir:str = os.path.join(training_pipeline_config.artifact_dir,DATA_TRANSFORMATION_DIR)
42
+ self.merged_file_path:str = os.path.join(self.data_transformation_dir,DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR,MERGED_FILE_NAME)
43
+
44
+ class CollaborativeModelConfig:
45
+ """
46
+ Configuration for model training, including paths for trained models.
47
+ """
48
+ def __init__(self,training_pipeline_config:TrainingPipelineConfig):
49
+ """
50
+ Initialize model trainer paths.
51
+ """
52
+ self.model_trainer_dir:str = os.path.join(training_pipeline_config.artifact_dir,MODEL_TRAINER_DIR_NAME)
53
+ self.svd_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
54
+ self.user_knn_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
55
+ self.item_knn_trained_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_COL_TRAINED_MODEL_DIR,MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
56
+
57
+ class ContentBasedModelConfig:
58
+ """
59
+ Configuration for model training, including paths for trained models.
60
+ """
61
+ def __init__(self,training_pipeline_config:TrainingPipelineConfig):
62
+ """
63
+ Initialize model trainer paths.
64
+ """
65
+ self.model_trainer_dir:str = os.path.join(training_pipeline_config.artifact_dir,MODEL_TRAINER_DIR_NAME)
66
  self.cosine_similarity_model_file_path:str = os.path.join(self.model_trainer_dir,MODEL_TRAINER_CON_TRAINED_MODEL_DIR,MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
anime_recommender/exception/exception.py CHANGED
@@ -1,44 +1,44 @@
1
- import sys
2
-
3
- class AnimeRecommendorException(Exception):
4
- """
5
- Custom exception class for handling errors in the Energy Generation Prediction project.
6
-
7
- This class captures the error message, file name, and line number where an exception occurred.
8
- It is useful for debugging and identifying the source of the error in a structured way.
9
- """
10
- def __init__(self,error_message, error_details:sys):
11
- """
12
- Initialize the EnergyGenerationException instance.
13
-
14
- Args:
15
- error_message (str): The error message describing the exception.
16
- error_details (sys): The sys module, used to extract exception details.
17
-
18
- Attributes:
19
- error_message (str): Stores the original error message.
20
- lineno (int): The line number where the exception occurred.
21
- file_name (str): The file name where the exception occurred.
22
- """
23
- self.error_message = error_message
24
- _,_,exc_tb = error_details.exc_info()
25
-
26
- self.lineno = exc_tb.tb_lineno
27
- self.file_name = exc_tb.tb_frame.f_code.co_filename
28
-
29
- def __str__(self):
30
- """
31
- Return the formatted error message.
32
-
33
- Returns:
34
- str: A string containing the file name, line number, and error message.
35
- """
36
- return "Error occured in python script name [{0}] line number [{1}] error message [{2}]".format(
37
- self.file_name,self.lineno, str(self.error_message))
38
-
39
- if __name__=="__main__":
40
- try:
41
- a = 1/0 # This example will raise a ZeroDivisionError
42
- print("This will not be printed",a)
43
- except Exception as e:
44
  raise AnimeRecommendorException(e,sys)
 
1
+ import sys
2
+
3
+ class AnimeRecommendorException(Exception):
4
+ """
5
+ Custom exception class for handling errors in the Energy Generation Prediction project.
6
+
7
+ This class captures the error message, file name, and line number where an exception occurred.
8
+ It is useful for debugging and identifying the source of the error in a structured way.
9
+ """
10
+ def __init__(self,error_message, error_details:sys):
11
+ """
12
+ Initialize the EnergyGenerationException instance.
13
+
14
+ Args:
15
+ error_message (str): The error message describing the exception.
16
+ error_details (sys): The sys module, used to extract exception details.
17
+
18
+ Attributes:
19
+ error_message (str): Stores the original error message.
20
+ lineno (int): The line number where the exception occurred.
21
+ file_name (str): The file name where the exception occurred.
22
+ """
23
+ self.error_message = error_message
24
+ _,_,exc_tb = error_details.exc_info()
25
+
26
+ self.lineno = exc_tb.tb_lineno
27
+ self.file_name = exc_tb.tb_frame.f_code.co_filename
28
+
29
+ def __str__(self):
30
+ """
31
+ Return the formatted error message.
32
+
33
+ Returns:
34
+ str: A string containing the file name, line number, and error message.
35
+ """
36
+ return "Error occured in python script name [{0}] line number [{1}] error message [{2}]".format(
37
+ self.file_name,self.lineno, str(self.error_message))
38
+
39
+ if __name__=="__main__":
40
+ try:
41
+ a = 1/0 # This example will raise a ZeroDivisionError
42
+ print("This will not be printed",a)
43
+ except Exception as e:
44
  raise AnimeRecommendorException(e,sys)
anime_recommender/loggers/logging.py CHANGED
@@ -1,16 +1,16 @@
1
- import os
2
- import logging
3
- from datetime import datetime
4
-
5
- LOGS_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
6
-
7
- logs_dir = os.path.join(os.getcwd(), "logs")
8
- os.makedirs(logs_dir, exist_ok=True)
9
-
10
- LOGS_FILE_PATH = os.path.join(logs_dir,LOGS_FILE)
11
-
12
- logging.basicConfig(
13
- filename= LOGS_FILE_PATH,
14
- format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
15
- level= logging.INFO,
16
  )
 
1
+ import os
2
+ import logging
3
+ from datetime import datetime
4
+
5
+ LOGS_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
6
+
7
+ logs_dir = os.path.join(os.getcwd(), "logs")
8
+ os.makedirs(logs_dir, exist_ok=True)
9
+
10
+ LOGS_FILE_PATH = os.path.join(logs_dir,LOGS_FILE)
11
+
12
+ logging.basicConfig(
13
+ filename= LOGS_FILE_PATH,
14
+ format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
15
+ level= logging.INFO,
16
  )
anime_recommender/model_trainer/collaborative_modelling.py CHANGED
@@ -1,183 +1,263 @@
1
- import sys
2
- import pandas as pd
3
- from anime_recommender.loggers.logging import logging
4
- from anime_recommender.exception.exception import AnimeRecommendorException
5
-
6
- from surprise import Reader, Dataset, SVD
7
- from surprise.model_selection import cross_validate
8
- from scipy.sparse import csr_matrix
9
- from sklearn.neighbors import NearestNeighbors
10
- from collections import Counter
11
-
12
- class CollaborativeAnimeRecommender:
13
- def __init__(self, df):
14
- self.df = df
15
- self.svd = None
16
- self.knn_item_based = None
17
- self.knn_user_based = None
18
- self.prepare_data()
19
-
20
-
21
- def prepare_data(self):
22
- self.df = self.df.drop_duplicates()
23
- reader = Reader(rating_scale=(1, 10))
24
- self.data = Dataset.load_from_df(self.df[['user_id', 'anime_id', 'rating']], reader)
25
- self.anime_pivot = self.df.pivot_table(index='name', columns='user_id', values='rating').fillna(0)
26
- self.user_pivot = self.df.pivot_table(index='user_id', columns='name', values='rating').fillna(0)
27
-
28
- def train_svd(self):
29
- self.svd = SVD()
30
- cross_validate(self.svd, self.data, cv=5)
31
- trainset = self.data.build_full_trainset()
32
- self.svd.fit(trainset)
33
-
34
- def train_knn_item_based(self):
35
- item_user_matrix = csr_matrix(self.anime_pivot.values)
36
- self.knn_item_based = NearestNeighbors(metric='cosine', algorithm='brute')
37
- self.knn_item_based.fit(item_user_matrix)
38
-
39
- def train_knn_user_based(self):
40
- """Train the KNN model for user-based recommendations."""
41
- user_item_matrix = csr_matrix(self.user_pivot.values)
42
- self.knn_user_based = NearestNeighbors(metric='cosine', algorithm='brute')
43
- self.knn_user_based.fit(user_item_matrix)
44
-
45
- def print_unique_user_ids(self):
46
- """Print unique user IDs from the dataset."""
47
- unique_user_ids = self.df['user_id'].unique()
48
- logging.info(f"Unique User IDs: {unique_user_ids}")
49
- return unique_user_ids
50
-
51
- def get_svd_recommendations(self, user_id, n=10, svd_model=None):
52
- # Use the provided SVD model or the trained self.svd model
53
- svd_model = svd_model or self.svd
54
- if svd_model is None:
55
- raise ValueError("SVD model is not provided or trained.")
56
-
57
- # Ensure user exists in the dataset
58
- if user_id not in self.df['user_id'].unique():
59
- return f"User ID '{user_id}' not found in the dataset."
60
-
61
- # Get unique anime IDs
62
- anime_ids = self.df['anime_id'].unique()
63
-
64
- # Predict ratings for all anime for the given user
65
- predictions = [(anime_id, svd_model.predict(user_id, anime_id).est) for anime_id in anime_ids]
66
- predictions.sort(key=lambda x: x[1], reverse=True)
67
-
68
- # Extract top N anime IDs
69
- recommended_anime_ids = [pred[0] for pred in predictions[:n]]
70
-
71
- # Get details of recommended anime
72
- recommended_anime = self.df[self.df['anime_id'].isin(recommended_anime_ids)].drop_duplicates(subset='anime_id')
73
- logging.info(f"Shape of recommended_anime: {recommended_anime.shape}")
74
- # Limit to N recommendations
75
- recommended_anime = recommended_anime.head(n)
76
-
77
- return pd.DataFrame({
78
- 'Anime Name': recommended_anime['name'].values,
79
- 'Genres': recommended_anime['genres'].values,
80
- 'Image URL': recommended_anime['image url'].values,
81
- 'Rating': recommended_anime['average_rating'].values
82
- })
83
-
84
- def get_item_based_recommendations(self, anime_name, n_recommendations=10, knn_item_model=None):
85
- # Use the provided model or fall back to self.knn_item_based
86
- knn_item_based = knn_item_model or self.knn_item_based
87
- if knn_item_based is None:
88
- raise ValueError("Item-based KNN model is not provided or trained.")
89
-
90
- # Ensure the anime name exists in the pivot table
91
- if anime_name not in self.anime_pivot.index:
92
- return f"Anime title '{anime_name}' not found in the dataset."
93
-
94
- # Get the index of the anime in the pivot table
95
- query_index = self.anime_pivot.index.get_loc(anime_name)
96
-
97
- # Use the KNN model to find similar animes (n_neighbors + 1 to exclude the query itself)
98
- distances, indices = knn_item_based.kneighbors(
99
- self.anime_pivot.iloc[query_index, :].values.reshape(1, -1),
100
- n_neighbors=n_recommendations + 1 # +1 because the query anime itself is included
101
- )
102
- recommendations = []
103
- for i in range(1, len(distances.flatten())): # Start from 1 to exclude the query anime
104
- anime_title = self.anime_pivot.index[indices.flatten()[i]]
105
- distance = distances.flatten()[i]
106
- recommendations.append((anime_title, distance))
107
-
108
- # Fetch the recommended anime names (top n_recommendations)
109
- recommended_anime_titles = [rec[0] for rec in recommendations]
110
- logging.info(f"Top {n_recommendations} recommendations: {recommended_anime_titles}")
111
- filtered_df = self.df[self.df['name'].isin(recommended_anime_titles)].drop_duplicates(subset='name')
112
- logging.info(f"Shape of filtered df: {filtered_df.shape}")
113
- # Limit the results to `n_recommendations`
114
- filtered_df = filtered_df.head(n_recommendations)
115
-
116
- return pd.DataFrame({
117
- 'Anime Name': filtered_df['name'].values,
118
- 'Image URL': filtered_df['image url'].values,
119
- 'Genres': filtered_df['genres'].values,
120
- 'Rating': filtered_df['average_rating'].values
121
- })
122
-
123
- def get_user_based_recommendations(self, user_id, n_recommendations=10, knn_user_model=None):
124
- """
125
- Recommend anime for a given user based on similar users' preferences using the provided or trained KNN model.
126
-
127
- Args:
128
- user_id (int): The ID of the user.
129
- n_recommendations (int): Number of recommendations to return.
130
- knn_user_model (NearestNeighbors, optional): Pre-trained KNN model. Defaults to None.
131
-
132
- Returns:
133
- pd.DataFrame: A DataFrame containing recommended anime titles and related information.
134
- """
135
- # Use the provided model or fall back to self.knn_user_based
136
- knn_user_based = knn_user_model or self.knn_user_based
137
- if knn_user_based is None:
138
- raise ValueError("User-based KNN model is not provided or trained.")
139
-
140
- # Ensure the user exists in the pivot table
141
- user_id = float(user_id) # Convert to match pivot table index type
142
- if user_id not in self.user_pivot.index:
143
- return f"User ID '{user_id}' not found in the dataset."
144
-
145
- # Find the user's index in the pivot table
146
- user_idx = self.user_pivot.index.get_loc(user_id)
147
-
148
- # Use the KNN model to find the nearest neighbors
149
- distances, indices = knn_user_based.kneighbors(
150
- self.user_pivot.iloc[user_idx, :].values.reshape(1, -1),
151
- n_neighbors=n_recommendations + 1 # Include the user itself
152
- )
153
-
154
- # Get the list of anime the user has already rated
155
- user_rated_anime = set(self.user_pivot.columns[self.user_pivot.iloc[user_idx, :] > 0])
156
-
157
- # Collect all anime rated by the nearest neighbors
158
- all_neighbor_ratings = []
159
- for i in range(1, len(distances.flatten())): # Start from 1 to exclude the user itself
160
- neighbor_idx = indices.flatten()[i]
161
- neighbor_rated_anime = self.user_pivot.iloc[neighbor_idx, :]
162
- neighbor_ratings = neighbor_rated_anime[neighbor_rated_anime > 0]
163
- all_neighbor_ratings.extend(neighbor_ratings.index)
164
-
165
- # Count how frequently each anime is rated by neighbors
166
- anime_counter = Counter(all_neighbor_ratings)
167
-
168
- # Recommend anime not already rated by the user
169
- recommendations = [(anime, count) for anime, count in anime_counter.items() if anime not in user_rated_anime]
170
- recommendations.sort(key=lambda x: x[1], reverse=True) # Sort by frequency
171
-
172
- # Extract recommended anime names and their details
173
- recommended_anime_titles = [rec[0] for rec in recommendations[:n_recommendations]]
174
- filtered_df = self.df[self.df['name'].isin(recommended_anime_titles)].drop_duplicates(subset='name')
175
- logging.info(f"Shape of filtered df: {filtered_df.shape}")
176
- filtered_df = filtered_df.head(n_recommendations)
177
-
178
- return pd.DataFrame({
179
- 'Anime Name': filtered_df['name'].values,
180
- 'Image URL': filtered_df['image url'].values,
181
- 'Genres': filtered_df['genres'].values,
182
- 'Rating': filtered_df['average_rating'].values
183
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from anime_recommender.loggers.logging import logging
3
+ from anime_recommender.exception.exception import AnimeRecommendorException
4
+
5
+ from surprise import Reader, Dataset, SVD
6
+ from surprise.model_selection import cross_validate
7
+ from scipy.sparse import csr_matrix
8
+ from sklearn.neighbors import NearestNeighbors
9
+ from collections import Counter
10
+
11
+ class CollaborativeAnimeRecommender:
12
+ """
13
+ A collaborative filtering-based anime recommender system that supports:
14
+ - Singular Value Decomposition (SVD)
15
+ - Item-based KNN
16
+ - User-based KNN
17
+ """
18
+ def __init__(self, df):
19
+ """
20
+ Initializes the recommender system with a given dataset.
21
+
22
+ Args:
23
+ df (pd.DataFrame): DataFrame containing anime ratings with 'user_id', 'anime_id', 'rating', etc.
24
+ """
25
+ try:
26
+ logging.info("Initializing CollaborativeAnimeRecommender")
27
+ self.df = df
28
+ self.svd = None
29
+ self.knn_item_based = None
30
+ self.knn_user_based = None
31
+ self.prepare_data()
32
+ except Exception as e:
33
+ raise AnimeRecommendorException(e)
34
+
35
+ def prepare_data(self):
36
+ """
37
+ Prepares data for training.
38
+ """
39
+ try:
40
+ self.df = self.df.drop_duplicates()
41
+ reader = Reader(rating_scale=(1, 10))
42
+ self.data = Dataset.load_from_df(self.df[['user_id', 'anime_id', 'rating']], reader)
43
+ self.anime_pivot = self.df.pivot_table(index='name', columns='user_id', values='rating').fillna(0)
44
+ self.user_pivot = self.df.pivot_table(index='user_id', columns='name', values='rating').fillna(0)
45
+ logging.info("Data preparation completed...")
46
+ except Exception as e:
47
+ raise AnimeRecommendorException(e)
48
+
49
+ def train_svd(self):
50
+ """
51
+ Trains the Singular Value Decomposition (SVD) model using Surprise.
52
+ """
53
+ try:
54
+ logging.info("Training SVD model")
55
+ self.svd = SVD()
56
+ cross_validate(self.svd, self.data, cv=5)
57
+ trainset = self.data.build_full_trainset()
58
+ self.svd.fit(trainset)
59
+ logging.info("SVD model training completed")
60
+ except Exception as e:
61
+ raise AnimeRecommendorException(e)
62
+
63
+ def train_knn_item_based(self):
64
+ """
65
+ Trains an item-based KNN model using cosine similarity.
66
+ """
67
+ try:
68
+ logging.info("Training KNN model")
69
+ item_user_matrix = csr_matrix(self.anime_pivot.values)
70
+ self.knn_item_based = NearestNeighbors(metric='cosine', algorithm='brute')
71
+ self.knn_item_based.fit(item_user_matrix)
72
+ except Exception as e:
73
+ raise AnimeRecommendorException(e)
74
+
75
+ def train_knn_user_based(self):
76
+ """Train the KNN model for user-based recommendations."""
77
+ try:
78
+ logging.info("Training KNN model")
79
+ user_item_matrix = csr_matrix(self.user_pivot.values)
80
+ self.knn_user_based = NearestNeighbors(metric='cosine', algorithm='brute')
81
+ self.knn_user_based.fit(user_item_matrix)
82
+ logging.info("KNN model training completed")
83
+ except Exception as e:
84
+ raise AnimeRecommendorException(e)
85
+
86
+ def print_unique_user_ids(self):
87
+ """
88
+ Logs and returns unique user IDs in the dataset.
89
+
90
+ Returns:
91
+ np.ndarray: Array of unique user IDs.
92
+ """
93
+ try:
94
+ unique_user_ids = self.df['user_id'].unique()
95
+ logging.info(f"Unique User IDs: {unique_user_ids}")
96
+ return unique_user_ids
97
+ except Exception as e:
98
+ raise AnimeRecommendorException(e)
99
+
100
+ def get_svd_recommendations(self, user_id, n=10, svd_model=None)-> pd.DataFrame:
101
+ """
102
+ Generates anime recommendations using the trained SVD model.
103
+
104
+ Args:
105
+ user_id (int): The user ID for which recommendations are generated.
106
+ n (int): Number of recommendations to return. Default is 10.
107
+ svd_model (SVD, optional): Pretrained SVD model. Uses self.svd if not provided.
108
+
109
+ Returns:
110
+ pd.DataFrame: A DataFrame containing recommended anime details.
111
+ """
112
+ try:
113
+ # Use the provided SVD model or the trained self.svd model
114
+ svd_model = svd_model or self.svd
115
+ if svd_model is None:
116
+ raise ValueError("SVD model is not provided or trained.")
117
+
118
+ # Ensure user exists in the dataset
119
+ if user_id not in self.df['user_id'].unique():
120
+ return f"User ID '{user_id}' not found in the dataset."
121
+
122
+ # Get unique anime IDs
123
+ anime_ids = self.df['anime_id'].unique()
124
+
125
+ # Predict ratings for all anime for the given user
126
+ predictions = [(anime_id, svd_model.predict(user_id, anime_id).est) for anime_id in anime_ids]
127
+ predictions.sort(key=lambda x: x[1], reverse=True)
128
+
129
+ # Extract top N anime IDs
130
+ recommended_anime_ids = [pred[0] for pred in predictions[:n]]
131
+
132
+ # Get details of recommended anime
133
+ recommended_anime = self.df[self.df['anime_id'].isin(recommended_anime_ids)].drop_duplicates(subset='anime_id')
134
+ logging.info(f"Shape of recommended_anime: {recommended_anime.shape}")
135
+ # Limit to N recommendations
136
+ recommended_anime = recommended_anime.head(n)
137
+
138
+ return pd.DataFrame({
139
+ 'Anime Name': recommended_anime['name'].values,
140
+ 'Genres': recommended_anime['genres'].values,
141
+ 'Image URL': recommended_anime['image url'].values,
142
+ 'Rating': recommended_anime['average_rating'].values
143
+ })
144
+ except Exception as e:
145
+ raise AnimeRecommendorException(e)
146
+
147
+ def get_item_based_recommendations(self, anime_name, n_recommendations=10, knn_item_model=None):
148
+ """
149
+ Get item-based recommendations for a given anime using a KNN model.
150
+
151
+ Args:
152
+ anime_name (str): The title of the anime for which recommendations are needed.
153
+ n_recommendations (int): The number of recommendations to return. Defaults to 10.
154
+ knn_item_model (NearestNeighbors): A trained KNN model. Defaults to None, in which case self.knn_item_based is used.
155
+
156
+ Returns:
157
+ pd.DataFrame: A DataFrame containing recommended anime names, genres, image URLs, and ratings.
158
+ """
159
+ try:
160
+ # Use the provided model or fall back to self.knn_item_based
161
+ knn_item_based = knn_item_model or self.knn_item_based
162
+ if knn_item_based is None:
163
+ raise ValueError("Item-based KNN model is not provided or trained.")
164
+
165
+ # Ensure the anime name exists in the pivot table
166
+ if anime_name not in self.anime_pivot.index:
167
+ return f"Anime title '{anime_name}' not found in the dataset."
168
+
169
+ # Get the index of the anime in the pivot table
170
+ query_index = self.anime_pivot.index.get_loc(anime_name)
171
+
172
+ # Use the KNN model to find similar animes (n_neighbors + 1 to exclude the query itself)
173
+ distances, indices = knn_item_based.kneighbors(
174
+ self.anime_pivot.iloc[query_index, :].values.reshape(1, -1),
175
+ n_neighbors=n_recommendations + 1 # +1 because the query anime itself is included
176
+ )
177
+ recommendations = []
178
+ for i in range(1, len(distances.flatten())): # Start from 1 to exclude the query anime
179
+ anime_title = self.anime_pivot.index[indices.flatten()[i]]
180
+ distance = distances.flatten()[i]
181
+ recommendations.append((anime_title, distance))
182
+
183
+ # Fetch the recommended anime names (top n_recommendations)
184
+ recommended_anime_titles = [rec[0] for rec in recommendations]
185
+ logging.info(f"Top {n_recommendations} recommendations: {recommended_anime_titles}")
186
+ filtered_df = self.df[self.df['name'].isin(recommended_anime_titles)].drop_duplicates(subset='name')
187
+ logging.info(f"Shape of filtered df: {filtered_df.shape}")
188
+
189
+ # Limit the results to `n_recommendations`
190
+ filtered_df = filtered_df.head(n_recommendations)
191
+
192
+ return pd.DataFrame({
193
+ 'Anime Name': filtered_df['name'].values,
194
+ 'Image URL': filtered_df['image url'].values,
195
+ 'Genres': filtered_df['genres'].values,
196
+ 'Rating': filtered_df['average_rating'].values
197
+ })
198
+ except Exception as e:
199
+ raise AnimeRecommendorException(e)
200
+
201
+ def get_user_based_recommendations(self, user_id, n_recommendations=10, knn_user_model=None)-> pd.DataFrame:
202
+ """
203
+ Recommend anime for a given user based on similar users' preferences using the provided or trained KNN model.
204
+
205
+ Args:
206
+ user_id (int): The ID of the user.
207
+ n_recommendations (int): Number of recommendations to return.
208
+ knn_user_model (NearestNeighbors): Pre-trained KNN model. Defaults to None.
209
+
210
+ Returns:
211
+ pd.DataFrame: A DataFrame containing recommended anime titles and related information.
212
+ """
213
+ try:
214
+ # Use the provided model or fall back to self.knn_user_based
215
+ knn_user_based = knn_user_model or self.knn_user_based
216
+ if knn_user_based is None:
217
+ raise ValueError("User-based KNN model is not provided or trained.")
218
+
219
+ # Ensure the user exists in the pivot table
220
+ user_id = float(user_id)
221
+ if user_id not in self.user_pivot.index:
222
+ return f"User ID '{user_id}' not found in the dataset."
223
+
224
+ # Find the user's index in the pivot table
225
+ user_idx = self.user_pivot.index.get_loc(user_id)
226
+
227
+ # Use the KNN model to find the nearest neighbors
228
+ distances, indices = knn_user_based.kneighbors(
229
+ self.user_pivot.iloc[user_idx, :].values.reshape(1, -1),
230
+ n_neighbors=n_recommendations + 1 # Include the user itself
231
+ )
232
+
233
+ # Get the list of anime the user has already rated
234
+ user_rated_anime = set(self.user_pivot.columns[self.user_pivot.iloc[user_idx, :] > 0])
235
+
236
+ # Collect all anime rated by the nearest neighbors
237
+ all_neighbor_ratings = []
238
+ for i in range(1, len(distances.flatten())): # Start from 1 to exclude the user itself
239
+ neighbor_idx = indices.flatten()[i]
240
+ neighbor_rated_anime = self.user_pivot.iloc[neighbor_idx, :]
241
+ neighbor_ratings = neighbor_rated_anime[neighbor_rated_anime > 0]
242
+ all_neighbor_ratings.extend(neighbor_ratings.index)
243
+
244
+ # Count how frequently each anime is rated by neighbors
245
+ anime_counter = Counter(all_neighbor_ratings)
246
+
247
+ # Recommend anime not already rated by the user
248
+ recommendations = [(anime, count) for anime, count in anime_counter.items() if anime not in user_rated_anime]
249
+ recommendations.sort(key=lambda x: x[1], reverse=True)
250
+ # Extract recommended anime names and their details
251
+ recommended_anime_titles = [rec[0] for rec in recommendations[:n_recommendations]]
252
+ filtered_df = self.df[self.df['name'].isin(recommended_anime_titles)].drop_duplicates(subset='name')
253
+ logging.info(f"Shape of filtered df: {filtered_df.shape}")
254
+ filtered_df = filtered_df.head(n_recommendations)
255
+
256
+ return pd.DataFrame({
257
+ 'Anime Name': filtered_df['name'].values,
258
+ 'Image URL': filtered_df['image url'].values,
259
+ 'Genres': filtered_df['genres'].values,
260
+ 'Rating': filtered_df['average_rating'].values
261
+ })
262
+ except Exception as e:
263
+ raise AnimeRecommendorException(e)
anime_recommender/model_trainer/content_based_modelling.py CHANGED
@@ -1,71 +1,73 @@
1
- import os
2
- import pandas as pd
3
- from sklearn.feature_extraction.text import TfidfVectorizer
4
- from sklearn.metrics.pairwise import cosine_similarity
5
- import joblib
6
-
7
-
8
- class ContentBasedRecommender:
9
- """
10
- A content-based recommender system using TF-IDF Vectorizer and Cosine Similarity.
11
- """
12
- def __init__(self, df):
13
- try:
14
- # Drop missing values from the DataFrame
15
- self.df = df.dropna()
16
-
17
- # Create a Series mapping anime names to their indices
18
- self.indices = pd.Series(self.df.index, index=self.df['name']).drop_duplicates()
19
-
20
- # Initialize and fit the TF-IDF Vectorizer on the 'genres' column
21
- self.tfv = TfidfVectorizer(
22
- min_df=3,
23
- strip_accents='unicode',
24
- analyzer='word',
25
- token_pattern=r'\w{1,}',
26
- ngram_range=(1, 3),
27
- stop_words='english'
28
- )
29
- self.tfv_matrix = self.tfv.fit_transform(self.df['genres'])
30
-
31
- self.cosine_sim = cosine_similarity(self.tfv_matrix, self.tfv_matrix)
32
-
33
- except Exception as e:
34
- raise e
35
- def save_model(self, model_path):
36
- """Save the trained model (TF-IDF and Cosine Similarity Matrix) to a file."""
37
- try:
38
- os.makedirs(os.path.dirname(model_path), exist_ok=True)
39
- with open(model_path, 'wb') as f:
40
- joblib.dump((self.tfv, self.cosine_sim), f)
41
-
42
- except Exception as e:
43
- raise e
44
- def get_rec_cosine(self, title, model_path, n_recommendations=5):
45
- """Get recommendations based on cosine similarity for a given anime title."""
46
- try:
47
- # Load the model (TF-IDF and cosine similarity matrix)
48
- with open(model_path, 'rb') as f:
49
- self.tfv, self.cosine_sim = joblib.load(f)
50
-
51
- # Check if the DataFrame is loaded
52
- if self.df is None:
53
- raise ValueError("The DataFrame is not loaded, cannot make recommendations.")
54
-
55
- if title not in self.indices.index:
56
- return f"Anime title '{title}' not found in the dataset."
57
-
58
- idx = self.indices[title]
59
- cosinesim_scores = list(enumerate(self.cosine_sim[idx]))
60
- cosinesim_scores = sorted(cosinesim_scores, key=lambda x: x[1], reverse=True)[1:n_recommendations + 1]
61
- anime_indices = [i[0] for i in cosinesim_scores]
62
-
63
- return pd.DataFrame({
64
- 'Anime name': self.df['name'].iloc[anime_indices].values,
65
- 'Image URL': self.df['image url'].iloc[anime_indices].values,
66
- 'Genres': self.df['genres'].iloc[anime_indices].values,
67
- 'Rating': self.df['average_rating'].iloc[anime_indices].values
68
- })
69
- except Exception as e:
70
- raise e
 
 
71
 
 
1
+ import os
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import joblib
6
+ from anime_recommender.loggers.logging import logging
7
+ from anime_recommender.exception.exception import AnimeRecommendorException
8
+
9
+ class ContentBasedRecommender:
10
+ """
11
+ A content-based recommender system using TF-IDF Vectorizer and Cosine Similarity.
12
+ """
13
+ def __init__(self, df):
14
+ try:
15
+ self.df = df.dropna()
16
+ # Create a Series mapping anime names to their indices
17
+ self.indices = pd.Series(self.df.index, index=self.df['name']).drop_duplicates()
18
+ # Initialize and fit the TF-IDF Vectorizer on the 'genres' column
19
+ self.tfv = TfidfVectorizer(
20
+ min_df=3,
21
+ strip_accents='unicode',
22
+ analyzer='word',
23
+ token_pattern=r'\w{1,}',
24
+ ngram_range=(1, 3),
25
+ stop_words='english'
26
+ )
27
+ self.tfv_matrix = self.tfv.fit_transform(self.df['genres'])
28
+ self.cosine_sim = cosine_similarity(self.tfv_matrix, self.tfv_matrix)
29
+ except Exception as e:
30
+ raise AnimeRecommendorException(e)
31
+
32
+ def save_model(self, model_path):
33
+ """Save the trained model (TF-IDF and Cosine Similarity Matrix) to a file."""
34
+ try:
35
+ logging.info(f"Saving model to {model_path}")
36
+ os.makedirs(os.path.dirname(model_path), exist_ok=True)
37
+ with open(model_path, 'wb') as f:
38
+ joblib.dump((self.tfv, self.cosine_sim), f)
39
+ logging.info("Content recommender Model saved successfully")
40
+ except Exception as e:
41
+ raise AnimeRecommendorException(e)
42
+
43
+ def get_rec_cosine(self, title, model_path, n_recommendations=5):
44
+ """Get recommendations based on cosine similarity for a given anime title."""
45
+ try:
46
+ logging.info(f"Loading model from {model_path}")
47
+ # Load the model (TF-IDF and cosine similarity matrix)
48
+ with open(model_path, 'rb') as f:
49
+ self.tfv, self.cosine_sim = joblib.load(f)
50
+ logging.info("Model loaded successfully")
51
+ # Check if the DataFrame is loaded
52
+ if self.df is None:
53
+ logging.error("The DataFrame is not loaded, cannot make recommendations.")
54
+ raise ValueError("The DataFrame is not loaded, cannot make recommendations.")
55
+
56
+ if title not in self.indices.index:
57
+ logging.warning(f"Anime title '{title}' not found in dataset")
58
+ return f"Anime title '{title}' not found in the dataset."
59
+
60
+ idx = self.indices[title]
61
+ cosinesim_scores = list(enumerate(self.cosine_sim[idx]))
62
+ cosinesim_scores = sorted(cosinesim_scores, key=lambda x: x[1], reverse=True)[1:n_recommendations + 1]
63
+ anime_indices = [i[0] for i in cosinesim_scores]
64
+ logging.info("Recommendations generated successfully")
65
+ return pd.DataFrame({
66
+ 'Anime name': self.df['name'].iloc[anime_indices].values,
67
+ 'Image URL': self.df['image url'].iloc[anime_indices].values,
68
+ 'Genres': self.df['genres'].iloc[anime_indices].values,
69
+ 'Rating': self.df['average_rating'].iloc[anime_indices].values
70
+ })
71
+ except Exception as e:
72
+ raise AnimeRecommendorException(e)
73
 
anime_recommender/model_trainer/top_anime_filtering.py CHANGED
@@ -1,93 +1,104 @@
1
- import sys
2
- import numpy as np
3
- import pandas as pd
4
- from anime_recommender.exception.exception import AnimeRecommendorException
5
-
6
- class PopularityBasedFiltering:
7
- def __init__(self, df):
8
- try:
9
- self.df = df
10
- self.df['average_rating'] = pd.to_numeric(self.df['average_rating'], errors='coerce')
11
- self.df['average_rating'].fillna(self.df['average_rating'].median())
12
- except Exception as e:
13
- raise AnimeRecommendorException(e, sys)
14
-
15
- def popular_animes(self, n=10):
16
- sorted_df = self.df.sort_values(by=['popularity'], ascending=True)
17
- top_n_anime = sorted_df.head(n)
18
- return pd.DataFrame({
19
- 'Anime name': top_n_anime['name'].values,
20
- 'Image URL': top_n_anime['image url'].values,
21
- 'Genres': top_n_anime['genres'].values,
22
- 'Rating': top_n_anime['average_rating'].values
23
- })
24
-
25
- def top_ranked_animes(self, n=10):
26
- self.df['rank'] = self.df['rank'].replace('UNKNOWN', np.nan).astype(float)
27
- df_filtered = self.df[self.df['rank'] > 1]
28
- sorted_df = df_filtered.sort_values(by=['rank'], ascending=True)
29
- top_n_anime = sorted_df.head(n)
30
- return pd.DataFrame({
31
- 'Anime name': top_n_anime['name'].values,
32
- 'Image URL': top_n_anime['image url'].values,
33
- 'Genres': top_n_anime['genres'].values,
34
- 'Rating': top_n_anime['average_rating'].values
35
- })
36
-
37
- def overall_top_rated_animes(self, n=10):
38
- sorted_df = self.df.sort_values(by=['average_rating'], ascending=False)
39
- top_n_anime = sorted_df.head(n)
40
- return pd.DataFrame({
41
- 'Anime name': top_n_anime['name'].values,
42
- 'Image URL': top_n_anime['image url'].values,
43
- 'Genres': top_n_anime['genres'].values,
44
- 'Rating': top_n_anime['average_rating'].values
45
- })
46
-
47
- def favorite_animes(self, n=10):
48
- sorted_df = self.df.sort_values(by=['favorites'], ascending=False)
49
- top_n_anime = sorted_df.head(n)
50
- return pd.DataFrame({
51
- 'Anime name': top_n_anime['name'].values,
52
- 'Image URL': top_n_anime['image url'].values,
53
- 'Genres': top_n_anime['genres'].values,
54
- 'Rating': top_n_anime['average_rating'].values
55
- })
56
-
57
- def top_animes_members(self, n=10):
58
- sorted_df = self.df.sort_values(by=['members'], ascending=False)
59
- top_n_anime = sorted_df.head(n)
60
- return pd.DataFrame({
61
- 'Anime name': top_n_anime['name'].values,
62
- 'Image URL': top_n_anime['image url'].values,
63
- 'Genres': top_n_anime['genres'].values,
64
- 'Rating': top_n_anime['average_rating'].values
65
- })
66
-
67
- def popular_anime_among_members(self, n=10):
68
- sorted_df = self.df.sort_values(by=['members', 'average_rating'], ascending=[False, False]).drop_duplicates(subset='name')
69
- popular_animes = sorted_df.head(n)
70
- return pd.DataFrame({
71
- 'Anime name': popular_animes['name'].values,
72
- 'Image URL': popular_animes['image url'].values,
73
- 'Genres': popular_animes['genres'].values,
74
- 'Rating': popular_animes['average_rating'].values
75
- })
76
-
77
- def top_avg_rated(self, n=10):
78
- self.df['average_rating'] = pd.to_numeric(self.df['average_rating'], errors='coerce')
79
-
80
- # Replace NaN values with the median
81
- median_rating = self.df['average_rating'].median()
82
- self.df['average_rating'].fillna(median_rating)
83
- # Select top N animes by average rating
84
- top_animes = (
85
- self.df.drop_duplicates(subset='name')
86
- .nlargest(n, 'average_rating')[['name', 'average_rating', 'image url', 'genres']]
87
- )
88
- return pd.DataFrame({
89
- 'Anime name': top_animes['name'].values,
90
- 'Image URL': top_animes['image url'].values,
91
- 'Genres': top_animes['genres'].values,
92
- 'Rating': top_animes['average_rating'].values
 
 
 
 
 
 
 
 
 
 
 
93
  })
 
1
+ import sys
2
+ import numpy as np
3
+ import pandas as pd
4
+ from anime_recommender.loggers.logging import logging
5
+ from anime_recommender.exception.exception import AnimeRecommendorException
6
+
7
+ class PopularityBasedFiltering:
8
+ """
9
+ A recommender system that filters popular animes based on different criteria such as popularity, rank,
10
+ average rating, number of members, and favorites.
11
+ """
12
+ def __init__(self, df):
13
+ """
14
+ Initialize the PopularityBasedFiltering class with a DataFrame.
15
+ """
16
+ try:
17
+ logging.info("Initializing PopularityBasedFiltering class")
18
+ self.df = df
19
+ self.df['average_rating'] = pd.to_numeric(self.df['average_rating'], errors='coerce')
20
+ self.df['average_rating'].fillna(self.df['average_rating'].median())
21
+ except Exception as e:
22
+ logging.error("Error initializing PopularityBasedFiltering: %s", str(e))
23
+ raise AnimeRecommendorException(e, sys)
24
+
25
+ def popular_animes(self, n=10):
26
+ """
27
+ Get the top N most popular animes.
28
+ """
29
+ logging.info("Fetching top %d most popular animes", n)
30
+ sorted_df = self.df.sort_values(by=['popularity'], ascending=True)
31
+ top_n_anime = sorted_df.head(n)
32
+ return self._format_output(top_n_anime)
33
+
34
+ def top_ranked_animes(self, n=10):
35
+ """
36
+ Get the top N ranked animes.
37
+ """
38
+ logging.info("Fetching top %d ranked animes", n)
39
+ self.df['rank'] = self.df['rank'].replace('UNKNOWN', np.nan).astype(float)
40
+ df_filtered = self.df[self.df['rank'] > 1]
41
+ sorted_df = df_filtered.sort_values(by=['rank'], ascending=True)
42
+ top_n_anime = sorted_df.head(n)
43
+ return self._format_output(top_n_anime)
44
+
45
+ def overall_top_rated_animes(self, n=10):
46
+ """
47
+ Get the top N highest-rated animes.
48
+ """
49
+ logging.info("Fetching top %d highest-rated animes", n)
50
+ sorted_df = self.df.sort_values(by=['average_rating'], ascending=False)
51
+ top_n_anime = sorted_df.head(n)
52
+ return self._format_output(top_n_anime)
53
+
54
+ def favorite_animes(self, n=10):
55
+ """
56
+ Get the top N most favorited animes.
57
+ """
58
+ logging.info("Fetching top %d most favorited animes", n)
59
+ sorted_df = self.df.sort_values(by=['favorites'], ascending=False)
60
+ top_n_anime = sorted_df.head(n)
61
+ return self._format_output(top_n_anime)
62
+
63
+ def top_animes_members(self, n=10):
64
+ """
65
+ Get the top N animes based on the number of members.
66
+ """
67
+ logging.info("Fetching top %d animes based on number of members", n)
68
+ sorted_df = self.df.sort_values(by=['members'], ascending=False)
69
+ top_n_anime = sorted_df.head(n)
70
+ return self._format_output(top_n_anime)
71
+
72
+ def popular_anime_among_members(self, n=10):
73
+ """
74
+ Get the top N animes popular among members based on the highest number of members and ratings.
75
+ """
76
+ logging.info("Fetching top %d popular animes among members", n)
77
+ sorted_df = self.df.sort_values(by=['members', 'average_rating'], ascending=[False, False]).drop_duplicates(subset='name')
78
+ popular_animes = sorted_df.head(n)
79
+ return self._format_output(popular_animes)
80
+
81
+ def top_avg_rated(self, n=10):
82
+ """
83
+ Get the top N highest-rated animes after handling missing values.
84
+ """
85
+ logging.info("Fetching top %d highest average-rated animes", n)
86
+ self.df['average_rating'] = pd.to_numeric(self.df['average_rating'], errors='coerce')
87
+ median_rating = self.df['average_rating'].median()
88
+ self.df['average_rating'].fillna(median_rating)
89
+ top_animes = (
90
+ self.df.drop_duplicates(subset='name')
91
+ .nlargest(n, 'average_rating')[['name', 'average_rating', 'image url', 'genres']]
92
+ )
93
+ return self._format_output(top_animes)
94
+
95
+ def _format_output(self, anime_df):
96
+ """
97
+ Format the output as a DataFrame with selected anime attributes.
98
+ """
99
+ return pd.DataFrame({
100
+ 'Anime name': anime_df['name'].values,
101
+ 'Image URL': anime_df['image url'].values,
102
+ 'Genres': anime_df['genres'].values,
103
+ 'Rating': anime_df['average_rating'].values
104
  })
anime_recommender/pipelines/training_pipeline.py CHANGED
@@ -1,152 +1,152 @@
1
- import sys
2
- from anime_recommender.loggers.logging import logging
3
- from anime_recommender.exception.exception import AnimeRecommendorException
4
-
5
- from anime_recommender.source.data_ingestion import DataIngestion
6
- from anime_recommender.source.data_transformation import DataTransformation
7
- from anime_recommender.source.collaborative_recommender import CollaborativeModelTrainer
8
- from anime_recommender.source.content_based_recommender import ContentBasedModelTrainer
9
- from anime_recommender.source.top_anime_recommenders import PopularityBasedRecommendor
10
- from anime_recommender.entity.config_entity import (
11
- TrainingPipelineConfig,
12
- DataIngestionConfig,
13
- DataTransformationConfig,
14
- CollaborativeModelConfig,
15
- ContentBasedModelConfig,
16
- )
17
- from anime_recommender.entity.artifact_entity import (
18
- DataIngestionArtifact,
19
- DataTransformationArtifact,
20
- CollaborativeModelArtifact,
21
- ContentBasedModelArtifact,
22
- )
23
-
24
- class TrainingPipeline:
25
- """
26
- Orchestrates the entire anime recommender training pipeline, including
27
- data ingestion, transformation, model training, and popularity-based recommendations.
28
- """
29
- def __init__(self):
30
- """
31
- Initialize the TrainingPipeline with required configurations.
32
- """
33
- self.training_pipeline_config = TrainingPipelineConfig()
34
-
35
- def start_data_ingestion(self) -> DataIngestionArtifact:
36
- """
37
- Starts the data ingestion process.
38
- Returns:
39
- DataIngestionArtifact: Contains information about ingested data.
40
- """
41
- try:
42
- logging.info("Initiating Data Ingestion...")
43
- data_ingestion_config = DataIngestionConfig(self.training_pipeline_config)
44
- data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config)
45
- data_ingestion_artifact = data_ingestion.ingest_data()
46
- logging.info(f"Data Ingestion completed: {data_ingestion_artifact}")
47
- return data_ingestion_artifact
48
- except Exception as e:
49
- raise AnimeRecommendorException(e, sys)
50
-
51
- def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataTransformationArtifact:
52
- """
53
- Starts the data transformation process.
54
- Returns:
55
- DataTransformationArtifact: Contains transformed data.
56
- """
57
- try:
58
- logging.info("Initiating Data Transformation...")
59
- data_transformation_config = DataTransformationConfig(self.training_pipeline_config)
60
- data_transformation = DataTransformation(
61
- data_ingestion_artifact=data_ingestion_artifact,
62
- data_transformation_config=data_transformation_config
63
- )
64
- data_transformation_artifact = data_transformation.initiate_data_transformation()
65
- logging.info(f"Data Transformation completed: {data_transformation_artifact}")
66
- return data_transformation_artifact
67
- except Exception as e:
68
- raise AnimeRecommendorException(e, sys)
69
-
70
- def start_collaborative_model_training(self, data_transformation_artifact: DataTransformationArtifact) -> CollaborativeModelArtifact:
71
- """
72
- Starts collaborative filtering model training.
73
- Returns:
74
- CollaborativeModelTrainerArtifact: Trained collaborative model artifact.
75
- """
76
- try:
77
- logging.info("Initiating Collaborative Model Training...")
78
- collaborative_model_config = CollaborativeModelConfig(self.training_pipeline_config)
79
- collaborative_model_trainer = CollaborativeModelTrainer(
80
- collaborative_model_trainer_config=collaborative_model_config,
81
- data_transformation_artifact=data_transformation_artifact
82
- )
83
- collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='user_knn')
84
- logging.info(f"Collaborative Model Training completed: {collaborative_model_trainer_artifact}")
85
- return collaborative_model_trainer_artifact
86
- except Exception as e:
87
- raise AnimeRecommendorException(e, sys)
88
-
89
- def start_content_based_model_training(self, data_ingestion_artifact: DataIngestionArtifact) -> ContentBasedModelArtifact:
90
- """
91
- Starts content-based filtering model training.
92
- Returns:
93
- ContentBasedModelTrainerArtifact: Trained content-based model artifact.
94
- """
95
- try:
96
- logging.info("Initiating Content-Based Model Training...")
97
- content_based_model_config = ContentBasedModelConfig(self.training_pipeline_config)
98
- content_based_model_trainer = ContentBasedModelTrainer(
99
- content_based_model_trainer_config=content_based_model_config,
100
- data_ingestion_artifact=data_ingestion_artifact
101
- )
102
- content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
103
- logging.info(f"Content-Based Model Training completed: {content_based_model_trainer_artifact}")
104
- return content_based_model_trainer_artifact
105
- except Exception as e:
106
- raise AnimeRecommendorException(e, sys)
107
-
108
- def start_popularity_based_filtering(self, data_ingestion_artifact: DataIngestionArtifact):
109
- """
110
- Generates popularity-based recommendations.
111
- """
112
- try:
113
- logging.info("Initiating Popularity-Based Filtering...")
114
- filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
115
- recommendations = filtering.initiate_model_trainer(filter_type='popular_animes')
116
- logging.info("Popularity-Based Filtering completed.")
117
- return recommendations
118
- except Exception as e:
119
- raise AnimeRecommendorException(e, sys)
120
-
121
- def run_pipeline(self):
122
- """
123
- Executes the entire training pipeline.
124
- """
125
- try:
126
- # Data Ingestion
127
- data_ingestion_artifact = self.start_data_ingestion()
128
-
129
- # Data Transformation
130
- data_transformation_artifact = self.start_data_transformation(data_ingestion_artifact)
131
-
132
- # Collaborative Model Training
133
- collaborative_model_trainer_artifact = self.start_collaborative_model_training(data_transformation_artifact)
134
-
135
- # Content-Based Model Training
136
- content_based_model_trainer_artifact = self.start_content_based_model_training(data_ingestion_artifact)
137
-
138
- # Popularity-Based Filtering
139
- popularity_recommendations = self.start_popularity_based_filtering(data_ingestion_artifact)
140
-
141
- logging.info("Training Pipeline executed successfully.")
142
- except Exception as e:
143
- raise AnimeRecommendorException(e, sys)
144
-
145
-
146
- if __name__ == "__main__":
147
- try:
148
- pipeline = TrainingPipeline()
149
- pipeline.run_pipeline()
150
- except Exception as e:
151
- logging.error(f"Pipeline execution failed: {str(e)}")
152
  raise AnimeRecommendorException(e, sys)
 
1
+ import sys
2
+ from anime_recommender.loggers.logging import logging
3
+ from anime_recommender.exception.exception import AnimeRecommendorException
4
+
5
+ from anime_recommender.components.data_ingestion import DataIngestion
6
+ from anime_recommender.components.data_transformation import DataTransformation
7
+ from anime_recommender.components.collaborative_recommender import CollaborativeModelTrainer
8
+ from anime_recommender.components.content_based_recommender import ContentBasedModelTrainer
9
+ from anime_recommender.components.top_anime_recommenders import PopularityBasedRecommendor
10
+ from anime_recommender.entity.config_entity import (
11
+ TrainingPipelineConfig,
12
+ DataIngestionConfig,
13
+ DataTransformationConfig,
14
+ CollaborativeModelConfig,
15
+ ContentBasedModelConfig,
16
+ )
17
+ from anime_recommender.entity.artifact_entity import (
18
+ DataIngestionArtifact,
19
+ DataTransformationArtifact,
20
+ CollaborativeModelArtifact,
21
+ ContentBasedModelArtifact,
22
+ )
23
+
24
+ class TrainingPipeline:
25
+ """
26
+ Orchestrates the entire anime recommender training pipeline, including
27
+ data ingestion, transformation, model training, and popularity-based recommendations.
28
+ """
29
+ def __init__(self):
30
+ """
31
+ Initialize the TrainingPipeline with required configurations.
32
+ """
33
+ self.training_pipeline_config = TrainingPipelineConfig()
34
+
35
+ def start_data_ingestion(self) -> DataIngestionArtifact:
36
+ """
37
+ Starts the data ingestion process.
38
+ Returns:
39
+ DataIngestionArtifact: Contains information about ingested data.
40
+ """
41
+ try:
42
+ logging.info("Initiating Data Ingestion...")
43
+ data_ingestion_config = DataIngestionConfig(self.training_pipeline_config)
44
+ data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config)
45
+ data_ingestion_artifact = data_ingestion.ingest_data()
46
+ logging.info(f"Data Ingestion completed: {data_ingestion_artifact}")
47
+ return data_ingestion_artifact
48
+ except Exception as e:
49
+ raise AnimeRecommendorException(e, sys)
50
+
51
+ def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataTransformationArtifact:
52
+ """
53
+ Starts the data transformation process.
54
+ Returns:
55
+ DataTransformationArtifact: Contains transformed data.
56
+ """
57
+ try:
58
+ logging.info("Initiating Data Transformation...")
59
+ data_transformation_config = DataTransformationConfig(self.training_pipeline_config)
60
+ data_transformation = DataTransformation(
61
+ data_ingestion_artifact=data_ingestion_artifact,
62
+ data_transformation_config=data_transformation_config
63
+ )
64
+ data_transformation_artifact = data_transformation.initiate_data_transformation()
65
+ logging.info(f"Data Transformation completed: {data_transformation_artifact}")
66
+ return data_transformation_artifact
67
+ except Exception as e:
68
+ raise AnimeRecommendorException(e, sys)
69
+
70
+ def start_collaborative_model_training(self, data_transformation_artifact: DataTransformationArtifact) -> CollaborativeModelArtifact:
71
+ """
72
+ Starts collaborative filtering model training.
73
+ Returns:
74
+ CollaborativeModelTrainerArtifact: Trained collaborative model artifact.
75
+ """
76
+ try:
77
+ logging.info("Initiating Collaborative Model Training...")
78
+ collaborative_model_config = CollaborativeModelConfig(self.training_pipeline_config)
79
+ collaborative_model_trainer = CollaborativeModelTrainer(
80
+ collaborative_model_trainer_config=collaborative_model_config,
81
+ data_transformation_artifact=data_transformation_artifact
82
+ )
83
+ collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='user_knn')
84
+ logging.info(f"Collaborative Model Training completed: {collaborative_model_trainer_artifact}")
85
+ return collaborative_model_trainer_artifact
86
+ except Exception as e:
87
+ raise AnimeRecommendorException(e, sys)
88
+
89
+ def start_content_based_model_training(self, data_ingestion_artifact: DataIngestionArtifact) -> ContentBasedModelArtifact:
90
+ """
91
+ Starts content-based filtering model training.
92
+ Returns:
93
+ ContentBasedModelTrainerArtifact: Trained content-based model artifact.
94
+ """
95
+ try:
96
+ logging.info("Initiating Content-Based Model Training...")
97
+ content_based_model_config = ContentBasedModelConfig(self.training_pipeline_config)
98
+ content_based_model_trainer = ContentBasedModelTrainer(
99
+ content_based_model_trainer_config=content_based_model_config,
100
+ data_ingestion_artifact=data_ingestion_artifact
101
+ )
102
+ content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
103
+ logging.info(f"Content-Based Model Training completed: {content_based_model_trainer_artifact}")
104
+ return content_based_model_trainer_artifact
105
+ except Exception as e:
106
+ raise AnimeRecommendorException(e, sys)
107
+
108
+ def start_popularity_based_filtering(self, data_ingestion_artifact: DataIngestionArtifact):
109
+ """
110
+ Generates popularity-based recommendations.
111
+ """
112
+ try:
113
+ logging.info("Initiating Popularity-Based Filtering...")
114
+ filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
115
+ recommendations = filtering.initiate_model_trainer(filter_type='popular_animes')
116
+ logging.info("Popularity-Based Filtering completed.")
117
+ return recommendations
118
+ except Exception as e:
119
+ raise AnimeRecommendorException(e, sys)
120
+
121
+ def run_pipeline(self):
122
+ """
123
+ Executes the entire training pipeline.
124
+ """
125
+ try:
126
+ # Data Ingestion
127
+ data_ingestion_artifact = self.start_data_ingestion()
128
+
129
+ # Data Transformation
130
+ data_transformation_artifact = self.start_data_transformation(data_ingestion_artifact)
131
+
132
+ # Collaborative Model Training
133
+ collaborative_model_trainer_artifact = self.start_collaborative_model_training(data_transformation_artifact)
134
+
135
+ # Content-Based Model Training
136
+ content_based_model_trainer_artifact = self.start_content_based_model_training(data_ingestion_artifact)
137
+
138
+ # Popularity-Based Filtering
139
+ popularity_recommendations = self.start_popularity_based_filtering(data_ingestion_artifact)
140
+
141
+ logging.info("Training Pipeline executed successfully.")
142
+ except Exception as e:
143
+ raise AnimeRecommendorException(e, sys)
144
+
145
+
146
+ if __name__ == "__main__":
147
+ try:
148
+ pipeline = TrainingPipeline()
149
+ pipeline.run_pipeline()
150
+ except Exception as e:
151
+ logging.error(f"Pipeline execution failed: {str(e)}")
152
  raise AnimeRecommendorException(e, sys)
anime_recommender/utils/main_utils/utils.py CHANGED
@@ -1,47 +1,89 @@
1
- import os
2
- import sys
3
- import time
4
- import pandas as pd
5
- import joblib
6
- from anime_recommender.loggers.logging import logging
7
- from anime_recommender.exception.exception import AnimeRecommendorException
8
- from anime_recommender.constant import *
9
-
10
- def export_data_to_dataframe(dataframe: pd.DataFrame, file_path: str) -> pd.DataFrame:
11
- try:
12
- logging.info(f"Saving DataFrame to file: {file_path}")
13
- dir_path = os.path.dirname(file_path)
14
- os.makedirs(dir_path, exist_ok=True)
15
- dataframe.to_csv(file_path, index=False, header=True)
16
- logging.info(f"DataFrame saved successfully to {file_path}.")
17
- return dataframe
18
- except Exception as e:
19
- raise AnimeRecommendorException(e, sys)
20
-
21
- def load_csv_data(file_path: str) -> pd.DataFrame:
22
- try:
23
- df = pd.read_csv(file_path)
24
- return df
25
- except Exception as e:
26
- raise AnimeRecommendorException(e, sys) from e
27
-
28
- def save_model(model: object,file_path: str ) -> None:
29
- try:
30
- logging.info("Entered the save_model method of Main utils class")
31
- os.makedirs(os.path.dirname(file_path), exist_ok=True)
32
- with open(file_path, "wb") as file_obj:
33
- joblib.dump(model, file_obj)
34
- logging.info("Completed saving the model object.")
35
- except Exception as e:
36
- raise AnimeRecommendorException(e, sys) from e
37
-
38
- def load_object(file_path:str)-> object:
39
- try:
40
- if not os.path.exists(file_path):
41
- raise Exception(f"The file: {file_path} is not exists")
42
- with open(file_path,"rb") as file_obj:
43
- print(file_obj)
44
- return joblib.load(file_obj)
45
- except Exception as e:
46
- raise AnimeRecommendorException(e,sys) from e
47
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import pandas as pd
4
+ import joblib
5
+ from anime_recommender.loggers.logging import logging
6
+ from anime_recommender.exception.exception import AnimeRecommendorException
7
+ from anime_recommender.constant import *
8
+
9
+ def export_data_to_dataframe(dataframe: pd.DataFrame, file_path: str) -> pd.DataFrame:
10
+ """
11
+ Saves a given Pandas DataFrame to a CSV file.
12
+
13
+ Args:
14
+ dataframe (pd.DataFrame): The DataFrame to be saved.
15
+ file_path (str): The file path where the DataFrame should be stored.
16
+
17
+ Returns:
18
+ pd.DataFrame: The same DataFrame that was saved.
19
+ """
20
+ try:
21
+ logging.info(f"Saving DataFrame to file: {file_path}")
22
+ dir_path = os.path.dirname(file_path)
23
+ os.makedirs(dir_path, exist_ok=True)
24
+ dataframe.to_csv(file_path, index=False, header=True)
25
+ logging.info(f"DataFrame saved successfully to {file_path}.")
26
+ return dataframe
27
+ except Exception as e:
28
+ logging.error(f"Error saving DataFrame to {file_path}: {e}")
29
+ raise AnimeRecommendorException(e, sys)
30
+
31
+ def load_csv_data(file_path: str) -> pd.DataFrame:
32
+ """
33
+ Loads a CSV file into a Pandas DataFrame.
34
+
35
+ Args:
36
+ file_path (str): The file path of the CSV file.
37
+
38
+ Returns:
39
+ pd.DataFrame: The loaded DataFrame.
40
+ """
41
+ try:
42
+ logging.info(f"Loading CSV data from file: {file_path}")
43
+ df = pd.read_csv(file_path)
44
+ logging.info("CSV file loaded successfully.")
45
+ return df
46
+ except Exception as e:
47
+ logging.error(f"Error loading CSV file {file_path}: {e}")
48
+ raise AnimeRecommendorException(e, sys) from e
49
+
50
+ def save_model(model: object, file_path: str) -> None:
51
+ """
52
+ Saves a machine learning model to a file using joblib.
53
+
54
+ Args:
55
+ model (object): The model object to be saved.
56
+ file_path (str): The file path where the model should be stored.
57
+ """
58
+ try:
59
+ logging.info("Entered the save_model method.")
60
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
61
+ with open(file_path, "wb") as file_obj:
62
+ joblib.dump(model, file_obj)
63
+ logging.info(f"Model saved successfully to {file_path}.")
64
+ except Exception as e:
65
+ logging.error(f"Error saving model to {file_path}: {e}")
66
+ raise AnimeRecommendorException(e, sys) from e
67
+
68
+ def load_object(file_path: str) -> object:
69
+ """
70
+ Loads a model or object from a file using joblib.
71
+
72
+ Args:
73
+ file_path (str): The file path of the saved model.
74
+
75
+ Returns:
76
+ object: The loaded model.
77
+ """
78
+ try:
79
+ logging.info(f"Attempting to load object from {file_path}")
80
+ if not os.path.exists(file_path):
81
+ error_msg = f"The file: {file_path} does not exist."
82
+ logging.error(error_msg)
83
+ raise Exception(error_msg)
84
+ with open(file_path, "rb") as file_obj:
85
+ logging.info("Object loaded successfully.")
86
+ return joblib.load(file_obj)
87
+ except Exception as e:
88
+ logging.error(f"Error loading object from {file_path}: {e}")
89
+ raise AnimeRecommendorException(e, sys) from e
app.py CHANGED
@@ -1,233 +1,233 @@
1
- import pandas as pd
2
- import streamlit as st
3
- from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
4
- from anime_recommender.model_trainer.collaborative_modelling import CollaborativeAnimeRecommender
5
- from anime_recommender.model_trainer.top_anime_filtering import PopularityBasedFiltering
6
- import joblib
7
- from anime_recommender.constant import *
8
- from huggingface_hub import hf_hub_download
9
- from datasets import load_dataset
10
-
11
- st.set_page_config(page_title="Anime Recommendation System", layout="wide")
12
-
13
- if "anime_data" not in st.session_state or "anime_user_ratings" not in st.session_state:
14
- # Load datasets from Hugging Face (assuming no splits)
15
- animedataset = load_dataset(ANIME_FILE_PATH, split=None)
16
- mergeddataset = load_dataset(ANIMEUSERRATINGS_FILE_PATH, split=None)
17
-
18
- # Convert the dataset to Pandas DataFrame
19
- st.session_state.anime_data = pd.DataFrame(animedataset["train"])
20
- st.session_state.anime_user_ratings = pd.DataFrame(mergeddataset["train"])
21
-
22
- # Load models only once
23
- if "models_loaded" not in st.session_state:
24
- st.session_state.models_loaded = {}
25
- # Load models
26
- st.session_state.models_loaded["cosine_similarity_model"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
27
- st.session_state.models_loaded["item_based_knn_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
28
- st.session_state.models_loaded["user_based_knn_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
29
- st.session_state.models_loaded["svd_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
30
-
31
- # Load the models using joblib
32
- with open(st.session_state.models_loaded["item_based_knn_model_path"], "rb") as f:
33
- st.session_state.models_loaded["item_based_knn_model"] = joblib.load(f)
34
-
35
- with open(st.session_state.models_loaded["user_based_knn_model_path"], "rb") as f:
36
- st.session_state.models_loaded["user_based_knn_model"] = joblib.load(f)
37
-
38
- with open(st.session_state.models_loaded["svd_model_path"], "rb") as f:
39
- st.session_state.models_loaded["svd_model"] = joblib.load(f)
40
-
41
- print("Models loaded successfully!")
42
-
43
- # Access the data from session state
44
- anime_data = st.session_state.anime_data
45
- anime_user_ratings = st.session_state.anime_user_ratings
46
-
47
- # # Display dataset info
48
- # st.write("Anime Data:")
49
- # st.dataframe(anime_data.head())
50
-
51
- # st.write("Anime User Ratings Data:")
52
- # st.dataframe(anime_user_ratings.head())
53
-
54
- # Access the models from session state
55
- cosine_similarity_model_path = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
56
- item_based_knn_model = st.session_state.models_loaded["item_based_knn_model"]
57
- user_based_knn_model = st.session_state.models_loaded["user_based_knn_model"]
58
- svd_model = st.session_state.models_loaded["svd_model"]
59
- print("Models loaded successfully!")
60
-
61
- # Streamlit UI
62
- app_selector = st.sidebar.radio(
63
- "Select App", ("Content-Based Recommender", "Collaborative Recommender", "Top Anime Recommender")
64
- )
65
-
66
- if app_selector == "Content-Based Recommender":
67
- st.title("Content-Based Recommendation System")
68
- try:
69
-
70
- anime_list = anime_data["name"].tolist()
71
- anime_name = st.selectbox("Pick an anime..unlock similar anime recommendations..", anime_list)
72
-
73
- # Set number of recommendations
74
- max_recommendations = min(len(anime_data), 100)
75
- n_recommendations = st.slider("Number of Recommendations", 1, max_recommendations, 10)
76
-
77
- # Inject custom CSS for anime name font size
78
- st.markdown(
79
- """
80
- <style>
81
- .anime-title {
82
- font-size: 14px !important;
83
- font-weight: bold;
84
- text-align: center;
85
- margin-top: 5px;
86
- }
87
- </style>
88
- """,
89
- unsafe_allow_html=True,
90
- )
91
- # Get Recommendations
92
- if st.button("Get Recommendations"):
93
- try:
94
- recommender = ContentBasedRecommender(anime_data)
95
- recommendations = recommender.get_rec_cosine(anime_name, n_recommendations=n_recommendations,model_path=cosine_similarity_model_path)
96
-
97
- if isinstance(recommendations, str):
98
- st.warning(recommendations)
99
- elif recommendations.empty:
100
- st.warning("No recommendations found.")
101
- else:
102
- st.write(f"Here are the Content-based Recommendations for {anime_name}:")
103
- cols = st.columns(5)
104
- for i, row in enumerate(recommendations.iterrows()):
105
- col = cols[i % 5]
106
- with col:
107
- st.image(row[1]['Image URL'], use_container_width=True)
108
- st.markdown(
109
- f"<div class='anime-title'>{row[1]['Anime name']}</div>",
110
- unsafe_allow_html=True,
111
- )
112
- st.caption(f"Genres: {row[1]['Genres']} | Rating: {row[1]['Rating']}")
113
- except Exception as e:
114
- st.error(f"Unexpected error: {str(e)}")
115
-
116
- except Exception as e:
117
- st.error(f"Unexpected error: {str(e)}")
118
-
119
- elif app_selector == "Collaborative Recommender":
120
- st.title("Collaborative Recommender System")
121
-
122
- try:
123
- # Sidebar for choosing the collaborative filtering method
124
- collaborative_method = st.sidebar.selectbox(
125
- "Choose a collaborative filtering method:",
126
- ["SVD Collaborative Filtering", "User-Based Collaborative Filtering", "Anime-Based KNN Collaborative Filtering"]
127
- )
128
-
129
- # User input
130
- if collaborative_method == "SVD Collaborative Filtering" or collaborative_method == "User-Based Collaborative Filtering":
131
- user_ids = anime_user_ratings['user_id'].unique()
132
- user_id = st.selectbox("Choose a user, and we'll show you animes they'd recommend", user_ids)
133
- n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=50, value=10)
134
- elif collaborative_method == "Anime-Based KNN Collaborative Filtering":
135
- anime_list = anime_user_ratings["name"].dropna().unique().tolist()
136
- anime_name = st.selectbox("Pick an anime, and we'll suggest more titles you'll love", anime_list)
137
- n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=50, value=10)
138
-
139
- # Get recommendations
140
- if st.button("Get Recommendations"):
141
- # Load the recommender
142
- recommender = CollaborativeAnimeRecommender(anime_user_ratings)
143
- if collaborative_method == "SVD Collaborative Filtering":
144
- recommendations = recommender.get_svd_recommendations(user_id, n=n_recommendations, svd_model=svd_model)
145
- elif collaborative_method == "User-Based Collaborative Filtering":
146
- recommendations = recommender.get_user_based_recommendations(user_id, n_recommendations=n_recommendations, knn_user_model=user_based_knn_model)
147
- elif collaborative_method == "Anime-Based KNN Collaborative Filtering":
148
- if anime_name:
149
- recommendations = recommender.get_item_based_recommendations(anime_name, n_recommendations=n_recommendations, knn_item_model=item_based_knn_model)
150
- else:
151
- st.error("Invalid Anime Name. Please enter a valid anime title.")
152
-
153
- if isinstance(recommendations, pd.DataFrame) and not recommendations.empty:
154
- if len(recommendations) < n_recommendations:
155
- st.warning(f"Oops...Only {len(recommendations)} recommendations available, fewer than the requested {n_recommendations}.")
156
- st.write(f"Here are the Collaborative Recommendations:")
157
- cols = st.columns(5)
158
- for i, row in enumerate(recommendations.iterrows()):
159
- col = cols[i % 5]
160
- with col:
161
- st.image(row[1]['Image URL'], use_container_width=True)
162
- st.markdown(
163
- f"<div class='anime-title'>{row[1]['Anime Name']}</div>",
164
- unsafe_allow_html=True,
165
- )
166
- st.caption(f"Genres: {row[1]['Genres']} | Rating: {row[1]['Rating']}")
167
- else:
168
- st.error("No recommendations found.")
169
- except Exception as e:
170
- st.error(f"An error occurred: {e}")
171
-
172
-
173
- elif app_selector == "Top Anime Recommender":
174
- st.title("Top Anime Recommender System")
175
-
176
- try:
177
- # Sidebar for choosing the popularity-based filtering method
178
- popularity_method = st.sidebar.selectbox(
179
- "Choose a Popularity-Based Filtering method:",
180
- [
181
- "Popular Animes",
182
- "Top Ranked Animes",
183
- "Overall Top Rated Animes",
184
- "Favorite Animes",
185
- "Top Animes by Members",
186
- "Popular Anime Among Members",
187
- "Top Average Rated Animes",
188
- ]
189
- )
190
-
191
- n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=500, value=10)
192
-
193
- if st.button("Get Recommendations"):
194
- # Load the popularity-based recommender
195
- recommender = PopularityBasedFiltering(anime_data)
196
-
197
- # Get recommendations based on selected method
198
- if popularity_method == "Popular Animes":
199
- recommendations = recommender.popular_animes(n=n_recommendations)
200
- elif popularity_method == "Top Ranked Animes":
201
- recommendations = recommender.top_ranked_animes(n=n_recommendations)
202
- elif popularity_method == "Overall Top Rated Animes":
203
- recommendations = recommender.overall_top_rated_animes(n=n_recommendations)
204
- elif popularity_method == "Favorite Animes":
205
- recommendations = recommender.favorite_animes(n=n_recommendations)
206
- elif popularity_method == "Top Animes by Members":
207
- recommendations = recommender.top_animes_members(n=n_recommendations)
208
- elif popularity_method == "Popular Anime Among Members":
209
- recommendations = recommender.popular_anime_among_members(n=n_recommendations)
210
- elif popularity_method == "Top Average Rated Animes":
211
- recommendations = recommender.top_avg_rated(n=n_recommendations)
212
- else:
213
- st.error("Invalid selection. Please choose a valid method.")
214
- recommendations = None
215
-
216
- # Display recommendations
217
- if isinstance(recommendations, pd.DataFrame) and not recommendations.empty:
218
- st.write(f"Here are the {popularity_method}:")
219
- cols = st.columns(5)
220
- for i, row in recommendations.iterrows():
221
- col = cols[i % 5]
222
- with col:
223
- st.image(row['Image URL'], use_container_width=True)
224
- st.markdown(
225
- f"<div class='anime-title'>{row['Anime name']}</div>",
226
- unsafe_allow_html=True,
227
- )
228
- st.caption(f"Genres: {row['Genres']} | Rating: {row['Rating']}")
229
- else:
230
- st.error("No recommendations found.")
231
- except Exception as e:
232
- st.error(f"An error occurred: {e}")
233
-
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
4
+ from anime_recommender.model_trainer.collaborative_modelling import CollaborativeAnimeRecommender
5
+ from anime_recommender.model_trainer.top_anime_filtering import PopularityBasedFiltering
6
+ import joblib
7
+ from anime_recommender.constant import *
8
+ from huggingface_hub import hf_hub_download
9
+ from datasets import load_dataset
10
+
11
+ st.set_page_config(page_title="Anime Recommendation System", layout="wide")
12
+
13
+ if "anime_data" not in st.session_state or "anime_user_ratings" not in st.session_state:
14
+ # Load datasets from Hugging Face (assuming no splits)
15
+ animedataset = load_dataset(ANIME_FILE_PATH, split=None)
16
+ mergeddataset = load_dataset(ANIMEUSERRATINGS_FILE_PATH, split=None)
17
+
18
+ # Convert the dataset to Pandas DataFrame
19
+ st.session_state.anime_data = pd.DataFrame(animedataset["train"])
20
+ st.session_state.anime_user_ratings = pd.DataFrame(mergeddataset["train"])
21
+
22
+ # Load models only once
23
+ if "models_loaded" not in st.session_state:
24
+ st.session_state.models_loaded = {}
25
+ # Load models
26
+ st.session_state.models_loaded["cosine_similarity_model"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
27
+ st.session_state.models_loaded["item_based_knn_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
28
+ st.session_state.models_loaded["user_based_knn_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
29
+ st.session_state.models_loaded["svd_model_path"] = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
30
+
31
+ # Load the models using joblib
32
+ with open(st.session_state.models_loaded["item_based_knn_model_path"], "rb") as f:
33
+ st.session_state.models_loaded["item_based_knn_model"] = joblib.load(f)
34
+
35
+ with open(st.session_state.models_loaded["user_based_knn_model_path"], "rb") as f:
36
+ st.session_state.models_loaded["user_based_knn_model"] = joblib.load(f)
37
+
38
+ with open(st.session_state.models_loaded["svd_model_path"], "rb") as f:
39
+ st.session_state.models_loaded["svd_model"] = joblib.load(f)
40
+
41
+ print("Models loaded successfully!")
42
+
43
+ # Access the data from session state
44
+ anime_data = st.session_state.anime_data
45
+ anime_user_ratings = st.session_state.anime_user_ratings
46
+
47
+ # # Display dataset info
48
+ # st.write("Anime Data:")
49
+ # st.dataframe(anime_data.head())
50
+
51
+ # st.write("Anime User Ratings Data:")
52
+ # st.dataframe(anime_user_ratings.head())
53
+
54
+ # Access the models from session state
55
+ cosine_similarity_model_path = hf_hub_download(MODELS_FILEPATH, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
56
+ item_based_knn_model = st.session_state.models_loaded["item_based_knn_model"]
57
+ user_based_knn_model = st.session_state.models_loaded["user_based_knn_model"]
58
+ svd_model = st.session_state.models_loaded["svd_model"]
59
+ print("Models loaded successfully!")
60
+
61
+ # Streamlit UI
62
+ app_selector = st.sidebar.radio(
63
+ "Select App", ("Content-Based Recommender", "Collaborative Recommender", "Top Anime Recommender")
64
+ )
65
+
66
+ if app_selector == "Content-Based Recommender":
67
+ st.title("Content-Based Recommendation System")
68
+ try:
69
+
70
+ anime_list = anime_data["name"].tolist()
71
+ anime_name = st.selectbox("Pick an anime..unlock similar anime recommendations..", anime_list)
72
+
73
+ # Set number of recommendations
74
+ max_recommendations = min(len(anime_data), 100)
75
+ n_recommendations = st.slider("Number of Recommendations", 1, max_recommendations, 10)
76
+
77
+ # Inject custom CSS for anime name font size
78
+ st.markdown(
79
+ """
80
+ <style>
81
+ .anime-title {
82
+ font-size: 14px !important;
83
+ font-weight: bold;
84
+ text-align: center;
85
+ margin-top: 5px;
86
+ }
87
+ </style>
88
+ """,
89
+ unsafe_allow_html=True,
90
+ )
91
+ # Get Recommendations
92
+ if st.button("Get Recommendations"):
93
+ try:
94
+ recommender = ContentBasedRecommender(anime_data)
95
+ recommendations = recommender.get_rec_cosine(anime_name, n_recommendations=n_recommendations,model_path=cosine_similarity_model_path)
96
+
97
+ if isinstance(recommendations, str):
98
+ st.warning(recommendations)
99
+ elif recommendations.empty:
100
+ st.warning("No recommendations found.")
101
+ else:
102
+ st.write(f"Here are the Content-based Recommendations for {anime_name}:")
103
+ cols = st.columns(5)
104
+ for i, row in enumerate(recommendations.iterrows()):
105
+ col = cols[i % 5]
106
+ with col:
107
+ st.image(row[1]['Image URL'], use_container_width=True)
108
+ st.markdown(
109
+ f"<div class='anime-title'>{row[1]['Anime name']}</div>",
110
+ unsafe_allow_html=True,
111
+ )
112
+ st.caption(f"Genres: {row[1]['Genres']} | Rating: {row[1]['Rating']}")
113
+ except Exception as e:
114
+ st.error(f"Unexpected error: {str(e)}")
115
+
116
+ except Exception as e:
117
+ st.error(f"Unexpected error: {str(e)}")
118
+
119
+ elif app_selector == "Collaborative Recommender":
120
+ st.title("Collaborative Recommender System")
121
+
122
+ try:
123
+ # Sidebar for choosing the collaborative filtering method
124
+ collaborative_method = st.sidebar.selectbox(
125
+ "Choose a collaborative filtering method:",
126
+ ["SVD Collaborative Filtering", "User-Based Collaborative Filtering", "Anime-Based KNN Collaborative Filtering"]
127
+ )
128
+
129
+ # User input
130
+ if collaborative_method == "SVD Collaborative Filtering" or collaborative_method == "User-Based Collaborative Filtering":
131
+ user_ids = anime_user_ratings['user_id'].unique()
132
+ user_id = st.selectbox("Choose a user, and we'll show you animes they'd recommend", user_ids)
133
+ n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=50, value=10)
134
+ elif collaborative_method == "Anime-Based KNN Collaborative Filtering":
135
+ anime_list = anime_user_ratings["name"].dropna().unique().tolist()
136
+ anime_name = st.selectbox("Pick an anime, and we'll suggest more titles you'll love", anime_list)
137
+ n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=50, value=10)
138
+
139
+ # Get recommendations
140
+ if st.button("Get Recommendations"):
141
+ # Load the recommender
142
+ recommender = CollaborativeAnimeRecommender(anime_user_ratings)
143
+ if collaborative_method == "SVD Collaborative Filtering":
144
+ recommendations = recommender.get_svd_recommendations(user_id, n=n_recommendations, svd_model=svd_model)
145
+ elif collaborative_method == "User-Based Collaborative Filtering":
146
+ recommendations = recommender.get_user_based_recommendations(user_id, n_recommendations=n_recommendations, knn_user_model=user_based_knn_model)
147
+ elif collaborative_method == "Anime-Based KNN Collaborative Filtering":
148
+ if anime_name:
149
+ recommendations = recommender.get_item_based_recommendations(anime_name, n_recommendations=n_recommendations, knn_item_model=item_based_knn_model)
150
+ else:
151
+ st.error("Invalid Anime Name. Please enter a valid anime title.")
152
+
153
+ if isinstance(recommendations, pd.DataFrame) and not recommendations.empty:
154
+ if len(recommendations) < n_recommendations:
155
+ st.warning(f"Oops...Only {len(recommendations)} recommendations available, fewer than the requested {n_recommendations}.")
156
+ st.write(f"Here are the Collaborative Recommendations:")
157
+ cols = st.columns(5)
158
+ for i, row in enumerate(recommendations.iterrows()):
159
+ col = cols[i % 5]
160
+ with col:
161
+ st.image(row[1]['Image URL'], use_container_width=True)
162
+ st.markdown(
163
+ f"<div class='anime-title'>{row[1]['Anime Name']}</div>",
164
+ unsafe_allow_html=True,
165
+ )
166
+ st.caption(f"Genres: {row[1]['Genres']} | Rating: {row[1]['Rating']}")
167
+ else:
168
+ st.error("No recommendations found.")
169
+ except Exception as e:
170
+ st.error(f"An error occurred: {e}")
171
+
172
+
173
+ elif app_selector == "Top Anime Recommender":
174
+ st.title("Top Anime Recommender System")
175
+
176
+ try:
177
+ # Sidebar for choosing the popularity-based filtering method
178
+ popularity_method = st.sidebar.selectbox(
179
+ "Choose a Popularity-Based Filtering method:",
180
+ [
181
+ "Popular Animes",
182
+ "Top Ranked Animes",
183
+ "Overall Top Rated Animes",
184
+ "Favorite Animes",
185
+ "Top Animes by Members",
186
+ "Popular Anime Among Members",
187
+ "Top Average Rated Animes",
188
+ ]
189
+ )
190
+
191
+ n_recommendations = st.slider("Number of Recommendations:", min_value=1, max_value=500, value=10)
192
+
193
+ if st.button("Get Recommendations"):
194
+ # Load the popularity-based recommender
195
+ recommender = PopularityBasedFiltering(anime_data)
196
+
197
+ # Get recommendations based on selected method
198
+ if popularity_method == "Popular Animes":
199
+ recommendations = recommender.popular_animes(n=n_recommendations)
200
+ elif popularity_method == "Top Ranked Animes":
201
+ recommendations = recommender.top_ranked_animes(n=n_recommendations)
202
+ elif popularity_method == "Overall Top Rated Animes":
203
+ recommendations = recommender.overall_top_rated_animes(n=n_recommendations)
204
+ elif popularity_method == "Favorite Animes":
205
+ recommendations = recommender.favorite_animes(n=n_recommendations)
206
+ elif popularity_method == "Top Animes by Members":
207
+ recommendations = recommender.top_animes_members(n=n_recommendations)
208
+ elif popularity_method == "Popular Anime Among Members":
209
+ recommendations = recommender.popular_anime_among_members(n=n_recommendations)
210
+ elif popularity_method == "Top Average Rated Animes":
211
+ recommendations = recommender.top_avg_rated(n=n_recommendations)
212
+ else:
213
+ st.error("Invalid selection. Please choose a valid method.")
214
+ recommendations = None
215
+
216
+ # Display recommendations
217
+ if isinstance(recommendations, pd.DataFrame) and not recommendations.empty:
218
+ st.write(f"Here are the {popularity_method}:")
219
+ cols = st.columns(5)
220
+ for i, row in recommendations.iterrows():
221
+ col = cols[i % 5]
222
+ with col:
223
+ st.image(row['Image URL'], use_container_width=True)
224
+ st.markdown(
225
+ f"<div class='anime-title'>{row['Anime name']}</div>",
226
+ unsafe_allow_html=True,
227
+ )
228
+ st.caption(f"Genres: {row['Genres']} | Rating: {row['Rating']}")
229
+ else:
230
+ st.error("No recommendations found.")
231
+ except Exception as e:
232
+ st.error(f"An error occurred: {e}")
233
+
notebooks/EDA.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/final_ARS.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -6,4 +6,9 @@ transformers
6
  huggingface_hub
7
  datasets
8
  scikit-surprise
 
 
 
 
 
9
  # -e .
 
6
  huggingface_hub
7
  datasets
8
  scikit-surprise
9
+ # wordcloud
10
+ # seaborn
11
+ # matplotlib
12
+ # squarify
13
+ # tensorflow
14
  # -e .
run_pipeline.py CHANGED
@@ -1,53 +1,53 @@
1
- import sys
2
- from anime_recommender.loggers.logging import logging
3
- from anime_recommender.exception.exception import AnimeRecommendorException
4
- from anime_recommender.source.data_ingestion import DataIngestion
5
- from anime_recommender.entity.config_entity import TrainingPipelineConfig,DataIngestionConfig,DataTransformationConfig,CollaborativeModelConfig,ContentBasedModelConfig
6
- from anime_recommender.source.data_transformation import DataTransformation
7
- from anime_recommender.source.collaborative_recommender import CollaborativeModelTrainer
8
- from anime_recommender.source.content_based_recommender import ContentBasedModelTrainer
9
- from anime_recommender.source.top_anime_recommenders import PopularityBasedRecommendor
10
-
11
-
12
- if __name__ == "__main__":
13
- try:
14
- training_pipeline_config = TrainingPipelineConfig()
15
- data_ingestion_config = DataIngestionConfig(training_pipeline_config)
16
- data_ingestion = DataIngestion(data_ingestion_config)
17
- logging.info("Initiating Data Ingestion.")
18
- data_ingestion_artifact = data_ingestion.ingest_data()
19
- logging.info(f"Data ingestion completed.")
20
- print(data_ingestion_artifact)
21
-
22
- # Data Transformation
23
- data_transformation_config = DataTransformationConfig(training_pipeline_config)
24
- data_transformation = DataTransformation(data_ingestion_artifact,data_transformation_config)
25
- logging.info("Initiating Data Transformation.")
26
- data_transformation_artifact = data_transformation.initiate_data_transformation()
27
- logging.info("Data Transformation Completed.")
28
- print(data_transformation_artifact)
29
-
30
- # Collaborative Model Training
31
- collaborative_model_trainer_config = CollaborativeModelConfig(training_pipeline_config)
32
- collaborative_model_trainer = CollaborativeModelTrainer(collaborative_model_trainer_config= collaborative_model_trainer_config,data_transformation_artifact=data_transformation_artifact)
33
- logging.info("Initiating Collaborative Model training.")
34
- collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='user_knn')
35
- logging.info("Collaborative Model training completed.")
36
- print(collaborative_model_trainer_artifact)
37
-
38
- # Content Based Model Training
39
- content_based_model_trainer_config = ContentBasedModelConfig(training_pipeline_config)
40
- content_based_model_trainer = ContentBasedModelTrainer(content_based_model_trainer_config=content_based_model_trainer_config,data_ingestion_artifact=data_ingestion_artifact)
41
- logging.info("Initiating Content Based Model training.")
42
- content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
43
- logging.info("Content Based Model training completed.")
44
- print(content_based_model_trainer_artifact)
45
-
46
- # Popularity Based Filtering
47
- logging.info("Initiating Popularity based filtering.")
48
- filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
49
- popularity_recommendations = filtering.initiate_model_trainer(filter_type='top_avg_rated')
50
- logging.info("Popularity based filtering completed.")
51
-
52
- except Exception as e:
53
  raise AnimeRecommendorException(e, sys)
 
1
+ import sys
2
+ from anime_recommender.loggers.logging import logging
3
+ from anime_recommender.exception.exception import AnimeRecommendorException
4
+ from anime_recommender.components.data_ingestion import DataIngestion
5
+ from anime_recommender.entity.config_entity import TrainingPipelineConfig,DataIngestionConfig,DataTransformationConfig,CollaborativeModelConfig,ContentBasedModelConfig
6
+ from anime_recommender.components.data_transformation import DataTransformation
7
+ from anime_recommender.components.collaborative_recommender import CollaborativeModelTrainer
8
+ from anime_recommender.components.content_based_recommender import ContentBasedModelTrainer
9
+ from anime_recommender.components.top_anime_recommenders import PopularityBasedRecommendor
10
+
11
+
12
+ if __name__ == "__main__":
13
+ try:
14
+ training_pipeline_config = TrainingPipelineConfig()
15
+ data_ingestion_config = DataIngestionConfig(training_pipeline_config)
16
+ data_ingestion = DataIngestion(data_ingestion_config)
17
+ logging.info("Initiating Data Ingestion.")
18
+ data_ingestion_artifact = data_ingestion.ingest_data()
19
+ logging.info(f"Data ingestion completed.")
20
+ print(data_ingestion_artifact)
21
+
22
+ # Data Transformation
23
+ data_transformation_config = DataTransformationConfig(training_pipeline_config)
24
+ data_transformation = DataTransformation(data_ingestion_artifact,data_transformation_config)
25
+ logging.info("Initiating Data Transformation.")
26
+ data_transformation_artifact = data_transformation.initiate_data_transformation()
27
+ logging.info("Data Transformation Completed.")
28
+ print(data_transformation_artifact)
29
+
30
+ # Collaborative Model Training
31
+ collaborative_model_trainer_config = CollaborativeModelConfig(training_pipeline_config)
32
+ collaborative_model_trainer = CollaborativeModelTrainer(collaborative_model_trainer_config= collaborative_model_trainer_config,data_transformation_artifact=data_transformation_artifact)
33
+ logging.info("Initiating Collaborative Model training.")
34
+ collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='user_knn')
35
+ logging.info("Collaborative Model training completed.")
36
+ print(collaborative_model_trainer_artifact)
37
+
38
+ # Content Based Model Training
39
+ content_based_model_trainer_config = ContentBasedModelConfig(training_pipeline_config)
40
+ content_based_model_trainer = ContentBasedModelTrainer(content_based_model_trainer_config=content_based_model_trainer_config,data_ingestion_artifact=data_ingestion_artifact)
41
+ logging.info("Initiating Content Based Model training.")
42
+ content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
43
+ logging.info("Content Based Model training completed.")
44
+ print(content_based_model_trainer_artifact)
45
+
46
+ # Popularity Based Filtering
47
+ logging.info("Initiating Popularity based filtering.")
48
+ filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
49
+ popularity_recommendations = filtering.initiate_model_trainer(filter_type='top_avg_rated')
50
+ logging.info("Popularity based filtering completed.")
51
+
52
+ except Exception as e:
53
  raise AnimeRecommendorException(e, sys)
setup.py CHANGED
@@ -1,29 +1,29 @@
1
- from setuptools import find_packages, setup
2
- from typing import List
3
-
4
- def get_requirements() -> List[str] :
5
- """
6
- This function returns the list of requirements
7
- """
8
- requirements_lst:List[str] = []
9
- try:
10
- with open("requirements.txt", "r") as file:
11
- lines = file.readlines()
12
- for line in lines:
13
- requirement = line.strip()
14
- if requirement and requirement != "-e .":
15
- requirements_lst.append(requirement)
16
- except FileNotFoundError:
17
- print("requirements.txt file not found")
18
- return requirements_lst
19
-
20
- print(get_requirements())
21
-
22
- setup(
23
- name="AnimeRecommendationSystem",
24
- version= "0.0.1",
25
- author= "Krishnaveni Ponna",
26
- author_email= "[email protected]",
27
- packages= find_packages(),
28
- install_requires = get_requirements()
29
  )
 
1
+ from setuptools import find_packages, setup
2
+ from typing import List
3
+
4
+ def get_requirements() -> List[str] :
5
+ """
6
+ This function returns the list of requirements
7
+ """
8
+ requirements_lst:List[str] = []
9
+ try:
10
+ with open("requirements.txt", "r") as file:
11
+ lines = file.readlines()
12
+ for line in lines:
13
+ requirement = line.strip()
14
+ if requirement and requirement != "-e .":
15
+ requirements_lst.append(requirement)
16
+ except FileNotFoundError:
17
+ print("requirements.txt file not found")
18
+ return requirements_lst
19
+
20
+ print(get_requirements())
21
+
22
+ setup(
23
+ name="AnimeRecommendationSystem",
24
+ version= "0.0.1",
25
+ author= "Krishnaveni Ponna",
26
+ author_email= "[email protected]",
27
+ packages= find_packages(),
28
+ install_requires = get_requirements()
29
  )