krishnaveni76 commited on
Commit
db5542d
·
1 Parent(s): 7cbefa6

Training pipeline created and completed

Browse files
anime_recommender/constant/__init__.py CHANGED
@@ -1,18 +1,16 @@
1
- import os
2
  """
3
  Defining common constant variables for training pipeline
4
  """
5
- PIPELINE_NAME: str = "AnimeRecommendor"
6
  ARTIFACT_DIR: str = "Artifacts"
7
  ANIME_FILE_NAME: str = "Animes.csv"
8
  RATING_FILE_NAME:str = "UserRatings.csv"
9
- MERGED_FILE_NAME:str = "Anime_UserRatings.csv"
10
- ZIP_FILE_PATH:str = 'datasets/archive.zip'
11
- DATASETS_FILE_PATH:str = "datasets"
12
 
13
  ANIME_FILE_PATH:str = "krishnaveni76/Animes"
14
  RATING_FILE_PATH:str = "krishnaveni76/UserRatings"
15
  ANIMEUSERRATINGS_FILE_PATH:str = "krishnaveni76/Anime_UserRatings"
 
16
 
17
  """
18
  Data Ingestion related constant start with DATA_INGESTION VAR NAME
@@ -39,5 +37,4 @@ MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME: str = "userbasedknn.pkl"
39
 
40
  MODEL_TRAINER_CON_TRAINED_MODEL_DIR:str = "content_based_recommenders"
41
  MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME:str = "cosine_similarity.pkl"
42
-
43
- MODEL_TRAINER_POP_TRAINED_MODEL_DIR:str = "popularity_based_recommenders"
 
 
1
  """
2
  Defining common constant variables for training pipeline
3
  """
4
+ PIPELINE_NAME: str = "AnimeRecommender"
5
  ARTIFACT_DIR: str = "Artifacts"
6
  ANIME_FILE_NAME: str = "Animes.csv"
7
  RATING_FILE_NAME:str = "UserRatings.csv"
8
+ MERGED_FILE_NAME:str = "Anime_UserRatings.csv"
 
 
9
 
10
  ANIME_FILE_PATH:str = "krishnaveni76/Animes"
11
  RATING_FILE_PATH:str = "krishnaveni76/UserRatings"
12
  ANIMEUSERRATINGS_FILE_PATH:str = "krishnaveni76/Anime_UserRatings"
13
+ MODELS_FILEPATH = "krishnaveni76/anime-recommendation-models"
14
 
15
  """
16
  Data Ingestion related constant start with DATA_INGESTION VAR NAME
 
37
 
38
  MODEL_TRAINER_CON_TRAINED_MODEL_DIR:str = "content_based_recommenders"
39
  MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME:str = "cosine_similarity.pkl"
40
+
 
anime_recommender/pipelines/__init__.py ADDED
File without changes
anime_recommender/pipelines/training_pipeline.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from anime_recommender.loggers.logging import logging
3
+ from anime_recommender.exception.exception import AnimeRecommendorException
4
+
5
+ from anime_recommender.source.data_ingestion import DataIngestion
6
+ from anime_recommender.source.data_transformation import DataTransformation
7
+ from anime_recommender.source.collaborative_recommender import CollaborativeModelTrainer
8
+ from anime_recommender.source.content_based_recommender import ContentBasedModelTrainer
9
+ from anime_recommender.source.top_anime_recommenders import PopularityBasedRecommendor
10
+ from anime_recommender.entity.config_entity import (
11
+ TrainingPipelineConfig,
12
+ DataIngestionConfig,
13
+ DataTransformationConfig,
14
+ CollaborativeModelConfig,
15
+ ContentBasedModelConfig,
16
+ )
17
+ from anime_recommender.entity.artifact_entity import (
18
+ DataIngestionArtifact,
19
+ DataTransformationArtifact,
20
+ CollaborativeModelArtifact,
21
+ ContentBasedModelArtifact,
22
+ )
23
+
24
+ class TrainingPipeline:
25
+ """
26
+ Orchestrates the entire anime recommender training pipeline, including
27
+ data ingestion, transformation, model training, and popularity-based recommendations.
28
+ """
29
+ def __init__(self):
30
+ """
31
+ Initialize the TrainingPipeline with required configurations.
32
+ """
33
+ self.training_pipeline_config = TrainingPipelineConfig()
34
+
35
+ def start_data_ingestion(self) -> DataIngestionArtifact:
36
+ """
37
+ Starts the data ingestion process.
38
+ Returns:
39
+ DataIngestionArtifact: Contains information about ingested data.
40
+ """
41
+ try:
42
+ logging.info("Initiating Data Ingestion...")
43
+ data_ingestion_config = DataIngestionConfig(self.training_pipeline_config)
44
+ data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config)
45
+ data_ingestion_artifact = data_ingestion.ingest_data()
46
+ logging.info(f"Data Ingestion completed: {data_ingestion_artifact}")
47
+ return data_ingestion_artifact
48
+ except Exception as e:
49
+ raise AnimeRecommendorException(e, sys)
50
+
51
+ def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataTransformationArtifact:
52
+ """
53
+ Starts the data transformation process.
54
+ Returns:
55
+ DataTransformationArtifact: Contains transformed data.
56
+ """
57
+ try:
58
+ logging.info("Initiating Data Transformation...")
59
+ data_transformation_config = DataTransformationConfig(self.training_pipeline_config)
60
+ data_transformation = DataTransformation(
61
+ data_ingestion_artifact=data_ingestion_artifact,
62
+ data_transformation_config=data_transformation_config
63
+ )
64
+ data_transformation_artifact = data_transformation.initiate_data_transformation()
65
+ logging.info(f"Data Transformation completed: {data_transformation_artifact}")
66
+ return data_transformation_artifact
67
+ except Exception as e:
68
+ raise AnimeRecommendorException(e, sys)
69
+
70
+ def start_collaborative_model_training(self, data_transformation_artifact: DataTransformationArtifact) -> CollaborativeModelArtifact:
71
+ """
72
+ Starts collaborative filtering model training.
73
+ Returns:
74
+ CollaborativeModelTrainerArtifact: Trained collaborative model artifact.
75
+ """
76
+ try:
77
+ logging.info("Initiating Collaborative Model Training...")
78
+ collaborative_model_config = CollaborativeModelConfig(self.training_pipeline_config)
79
+ collaborative_model_trainer = CollaborativeModelTrainer(
80
+ collaborative_model_trainer_config=collaborative_model_config,
81
+ data_transformation_artifact=data_transformation_artifact
82
+ )
83
+ collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='user_knn')
84
+ logging.info(f"Collaborative Model Training completed: {collaborative_model_trainer_artifact}")
85
+ return collaborative_model_trainer_artifact
86
+ except Exception as e:
87
+ raise AnimeRecommendorException(e, sys)
88
+
89
+ def start_content_based_model_training(self, data_ingestion_artifact: DataIngestionArtifact) -> ContentBasedModelArtifact:
90
+ """
91
+ Starts content-based filtering model training.
92
+ Returns:
93
+ ContentBasedModelTrainerArtifact: Trained content-based model artifact.
94
+ """
95
+ try:
96
+ logging.info("Initiating Content-Based Model Training...")
97
+ content_based_model_config = ContentBasedModelConfig(self.training_pipeline_config)
98
+ content_based_model_trainer = ContentBasedModelTrainer(
99
+ content_based_model_trainer_config=content_based_model_config,
100
+ data_ingestion_artifact=data_ingestion_artifact
101
+ )
102
+ content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
103
+ logging.info(f"Content-Based Model Training completed: {content_based_model_trainer_artifact}")
104
+ return content_based_model_trainer_artifact
105
+ except Exception as e:
106
+ raise AnimeRecommendorException(e, sys)
107
+
108
+ def start_popularity_based_filtering(self, data_ingestion_artifact: DataIngestionArtifact):
109
+ """
110
+ Generates popularity-based recommendations.
111
+ """
112
+ try:
113
+ logging.info("Initiating Popularity-Based Filtering...")
114
+ filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
115
+ recommendations = filtering.initiate_model_trainer(filter_type='popular_animes')
116
+ logging.info("Popularity-Based Filtering completed.")
117
+ return recommendations
118
+ except Exception as e:
119
+ raise AnimeRecommendorException(e, sys)
120
+
121
+ def run_pipeline(self):
122
+ """
123
+ Executes the entire training pipeline.
124
+ """
125
+ try:
126
+ # Data Ingestion
127
+ data_ingestion_artifact = self.start_data_ingestion()
128
+
129
+ # Data Transformation
130
+ data_transformation_artifact = self.start_data_transformation(data_ingestion_artifact)
131
+
132
+ # Collaborative Model Training
133
+ collaborative_model_trainer_artifact = self.start_collaborative_model_training(data_transformation_artifact)
134
+
135
+ # Content-Based Model Training
136
+ content_based_model_trainer_artifact = self.start_content_based_model_training(data_ingestion_artifact)
137
+
138
+ # Popularity-Based Filtering
139
+ popularity_recommendations = self.start_popularity_based_filtering(data_ingestion_artifact)
140
+
141
+ logging.info("Training Pipeline executed successfully.")
142
+ except Exception as e:
143
+ raise AnimeRecommendorException(e, sys)
144
+
145
+
146
+ if __name__ == "__main__":
147
+ try:
148
+ pipeline = TrainingPipeline()
149
+ pipeline.run_pipeline()
150
+ except Exception as e:
151
+ logging.error(f"Pipeline execution failed: {str(e)}")
152
+ raise AnimeRecommendorException(e, sys)
app.py CHANGED
@@ -1,10 +1,10 @@
1
- import sys
2
  import pandas as pd
3
  import streamlit as st
4
- from anime_recommender.content_filtering_models import ContentBasedRecommender
5
- from anime_recommender.collaborative_filtering_models import CollaborativeAnimeRecommender
6
- from anime_recommender.popularity_based_filtering import PopularityBasedFiltering
7
  import joblib
 
8
  from huggingface_hub import hf_hub_download
9
  from datasets import load_dataset
10
 
@@ -12,14 +12,38 @@ st.set_page_config(page_title="Anime Recommendation System", layout="wide")
12
 
13
  if "anime_data" not in st.session_state or "anime_user_ratings" not in st.session_state:
14
  # Load datasets from Hugging Face (assuming no splits)
15
- animedataset = load_dataset("krishnaveni76/Animes", split=None)
16
- mergeddataset = load_dataset("krishnaveni76/Anime_UserRatings", split=None)
17
 
18
  # Convert the dataset to Pandas DataFrame
19
  st.session_state.anime_data = pd.DataFrame(animedataset["train"])
20
  st.session_state.anime_user_ratings = pd.DataFrame(mergeddataset["train"])
21
 
22
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # Access the data from session state
24
  anime_data = st.session_state.anime_data
25
  anime_user_ratings = st.session_state.anime_user_ratings
@@ -31,25 +55,30 @@ st.dataframe(anime_data)
31
  st.write("Anime User Ratings Data:")
32
  st.dataframe(anime_user_ratings)
33
 
34
- # Define your repository name
35
- repo_name = "krishnaveni76/anime-recommendation-models"
 
 
36
 
37
- # Load models
38
- cosine_similarity_model_path = hf_hub_download(repo_name, "cosine_similarity.pkl")
39
- item_based_knn_model_path = hf_hub_download(repo_name, "itembasedknn.pkl")
40
- user_based_knn_model_path = hf_hub_download(repo_name, "userbasedknn.pkl")
41
- svd_model_path = hf_hub_download(repo_name, "svd.pkl")
42
 
43
- with open(item_based_knn_model_path, "rb") as f:
44
- item_based_knn_model = joblib.load(f)
45
 
46
- with open(user_based_knn_model_path, "rb") as f:
47
- user_based_knn_model = joblib.load(f)
48
 
49
- with open(svd_model_path, "rb") as f:
50
- svd_model = joblib.load(f)
51
 
52
- # Now you can use these models for recommendations
 
 
 
 
 
53
  print("Models loaded successfully!")
54
 
55
  # Streamlit UI
@@ -113,8 +142,7 @@ if app_selector == "Content-Based Recommender":
113
  elif app_selector == "Collaborative Recommender":
114
  st.title("Collaborative Recommender System")
115
 
116
- try:
117
-
118
  # Sidebar for choosing the collaborative filtering method
119
  collaborative_method = st.sidebar.selectbox(
120
  "Choose a collaborative filtering method:",
 
 
1
  import pandas as pd
2
  import streamlit as st
3
+ from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
4
+ from anime_recommender.model_trainer.collaborative_modelling import CollaborativeAnimeRecommender
5
+ from anime_recommender.model_trainer.top_anime_filtering import PopularityBasedFiltering
6
  import joblib
7
+ from anime_recommender.constant import *
8
  from huggingface_hub import hf_hub_download
9
  from datasets import load_dataset
10
 
 
12
 
13
  if "anime_data" not in st.session_state or "anime_user_ratings" not in st.session_state:
14
  # Load datasets from Hugging Face (assuming no splits)
15
+ animedataset = load_dataset(ANIME_FILE_PATH, split=None)
16
+ mergeddataset = load_dataset(ANIMEUSERRATINGS_FILE_PATH, split=None)
17
 
18
  # Convert the dataset to Pandas DataFrame
19
  st.session_state.anime_data = pd.DataFrame(animedataset["train"])
20
  st.session_state.anime_user_ratings = pd.DataFrame(mergeddataset["train"])
21
 
22
+ # Load models only once
23
+ if "models_loaded" not in st.session_state:
24
+ st.session_state.models_loaded = {}
25
+
26
+ # Define your repository name
27
+ repo_name = MODELS_FILEPATH
28
+
29
+ # Load models
30
+ st.session_state.models_loaded["cosine_similarity_model"] = hf_hub_download(repo_name, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
31
+ st.session_state.models_loaded["item_based_knn_model_path"] = hf_hub_download(repo_name, MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
32
+ st.session_state.models_loaded["user_based_knn_model_path"] = hf_hub_download(repo_name, MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
33
+ st.session_state.models_loaded["svd_model_path"] = hf_hub_download(repo_name, MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
34
+
35
+ # Load the models using joblib
36
+ with open(st.session_state.models_loaded["item_based_knn_model_path"], "rb") as f:
37
+ st.session_state.models_loaded["item_based_knn_model"] = joblib.load(f)
38
+
39
+ with open(st.session_state.models_loaded["user_based_knn_model_path"], "rb") as f:
40
+ st.session_state.models_loaded["user_based_knn_model"] = joblib.load(f)
41
+
42
+ with open(st.session_state.models_loaded["svd_model_path"], "rb") as f:
43
+ st.session_state.models_loaded["svd_model"] = joblib.load(f)
44
+
45
+ print("Models loaded successfully!")
46
+
47
  # Access the data from session state
48
  anime_data = st.session_state.anime_data
49
  anime_user_ratings = st.session_state.anime_user_ratings
 
55
  st.write("Anime User Ratings Data:")
56
  st.dataframe(anime_user_ratings)
57
 
58
+ # # Define your repository name
59
+ # repo_name = "krishnaveni76/anime-recommendation-models"
60
+
61
+ # # Load models
62
 
63
+ # item_based_knn_model_path = hf_hub_download(repo_name, MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
64
+ # user_based_knn_model_path = hf_hub_download(repo_name, MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
65
+ # svd_model_path = hf_hub_download(repo_name,MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
 
 
66
 
67
+ # with open(item_based_knn_model_path, "rb") as f:
68
+ # item_based_knn_model = joblib.load(f)
69
 
70
+ # with open(user_based_knn_model_path, "rb") as f:
71
+ # user_based_knn_model = joblib.load(f)
72
 
73
+ # with open(svd_model_path, "rb") as f:
74
+ # svd_model = joblib.load(f)
75
 
76
+
77
+ # Access the models from session state
78
+ cosine_similarity_model_path = hf_hub_download(repo_name, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
79
+ item_based_knn_model = st.session_state.models_loaded["item_based_knn_model"]
80
+ user_based_knn_model = st.session_state.models_loaded["user_based_knn_model"]
81
+ svd_model = st.session_state.models_loaded["svd_model"]
82
  print("Models loaded successfully!")
83
 
84
  # Streamlit UI
 
142
  elif app_selector == "Collaborative Recommender":
143
  st.title("Collaborative Recommender System")
144
 
145
+ try:
 
146
  # Sidebar for choosing the collaborative filtering method
147
  collaborative_method = st.sidebar.selectbox(
148
  "Choose a collaborative filtering method:",
requirements.txt CHANGED
@@ -6,4 +6,4 @@ transformers
6
  huggingface_hub
7
  datasets
8
  scikit-surprise
9
- -e .
 
6
  huggingface_hub
7
  datasets
8
  scikit-surprise
9
+ # -e .
run_pipeline.py CHANGED
@@ -8,6 +8,7 @@ from anime_recommender.source.collaborative_recommender import CollaborativeMode
8
  from anime_recommender.source.content_based_recommender import ContentBasedModelTrainer
9
  from anime_recommender.source.top_anime_recommenders import PopularityBasedRecommendor
10
 
 
11
  if __name__ == "__main__":
12
  try:
13
  training_pipeline_config = TrainingPipelineConfig()
 
8
  from anime_recommender.source.content_based_recommender import ContentBasedModelTrainer
9
  from anime_recommender.source.top_anime_recommenders import PopularityBasedRecommendor
10
 
11
+
12
  if __name__ == "__main__":
13
  try:
14
  training_pipeline_config = TrainingPipelineConfig()