Commit
·
db5542d
1
Parent(s):
7cbefa6
Training pipeline created and completed
Browse files- anime_recommender/constant/__init__.py +4 -7
- anime_recommender/pipelines/__init__.py +0 -0
- anime_recommender/pipelines/training_pipeline.py +152 -0
- app.py +51 -23
- requirements.txt +1 -1
- run_pipeline.py +1 -0
anime_recommender/constant/__init__.py
CHANGED
@@ -1,18 +1,16 @@
|
|
1 |
-
import os
|
2 |
"""
|
3 |
Defining common constant variables for training pipeline
|
4 |
"""
|
5 |
-
PIPELINE_NAME: str = "
|
6 |
ARTIFACT_DIR: str = "Artifacts"
|
7 |
ANIME_FILE_NAME: str = "Animes.csv"
|
8 |
RATING_FILE_NAME:str = "UserRatings.csv"
|
9 |
-
MERGED_FILE_NAME:str = "Anime_UserRatings.csv"
|
10 |
-
ZIP_FILE_PATH:str = 'datasets/archive.zip'
|
11 |
-
DATASETS_FILE_PATH:str = "datasets"
|
12 |
|
13 |
ANIME_FILE_PATH:str = "krishnaveni76/Animes"
|
14 |
RATING_FILE_PATH:str = "krishnaveni76/UserRatings"
|
15 |
ANIMEUSERRATINGS_FILE_PATH:str = "krishnaveni76/Anime_UserRatings"
|
|
|
16 |
|
17 |
"""
|
18 |
Data Ingestion related constant start with DATA_INGESTION VAR NAME
|
@@ -39,5 +37,4 @@ MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME: str = "userbasedknn.pkl"
|
|
39 |
|
40 |
MODEL_TRAINER_CON_TRAINED_MODEL_DIR:str = "content_based_recommenders"
|
41 |
MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME:str = "cosine_similarity.pkl"
|
42 |
-
|
43 |
-
MODEL_TRAINER_POP_TRAINED_MODEL_DIR:str = "popularity_based_recommenders"
|
|
|
|
|
1 |
"""
|
2 |
Defining common constant variables for training pipeline
|
3 |
"""
|
4 |
+
PIPELINE_NAME: str = "AnimeRecommender"
|
5 |
ARTIFACT_DIR: str = "Artifacts"
|
6 |
ANIME_FILE_NAME: str = "Animes.csv"
|
7 |
RATING_FILE_NAME:str = "UserRatings.csv"
|
8 |
+
MERGED_FILE_NAME:str = "Anime_UserRatings.csv"
|
|
|
|
|
9 |
|
10 |
ANIME_FILE_PATH:str = "krishnaveni76/Animes"
|
11 |
RATING_FILE_PATH:str = "krishnaveni76/UserRatings"
|
12 |
ANIMEUSERRATINGS_FILE_PATH:str = "krishnaveni76/Anime_UserRatings"
|
13 |
+
MODELS_FILEPATH = "krishnaveni76/anime-recommendation-models"
|
14 |
|
15 |
"""
|
16 |
Data Ingestion related constant start with DATA_INGESTION VAR NAME
|
|
|
37 |
|
38 |
MODEL_TRAINER_CON_TRAINED_MODEL_DIR:str = "content_based_recommenders"
|
39 |
MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME:str = "cosine_similarity.pkl"
|
40 |
+
|
|
anime_recommender/pipelines/__init__.py
ADDED
File without changes
|
anime_recommender/pipelines/training_pipeline.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
from anime_recommender.loggers.logging import logging
|
3 |
+
from anime_recommender.exception.exception import AnimeRecommendorException
|
4 |
+
|
5 |
+
from anime_recommender.source.data_ingestion import DataIngestion
|
6 |
+
from anime_recommender.source.data_transformation import DataTransformation
|
7 |
+
from anime_recommender.source.collaborative_recommender import CollaborativeModelTrainer
|
8 |
+
from anime_recommender.source.content_based_recommender import ContentBasedModelTrainer
|
9 |
+
from anime_recommender.source.top_anime_recommenders import PopularityBasedRecommendor
|
10 |
+
from anime_recommender.entity.config_entity import (
|
11 |
+
TrainingPipelineConfig,
|
12 |
+
DataIngestionConfig,
|
13 |
+
DataTransformationConfig,
|
14 |
+
CollaborativeModelConfig,
|
15 |
+
ContentBasedModelConfig,
|
16 |
+
)
|
17 |
+
from anime_recommender.entity.artifact_entity import (
|
18 |
+
DataIngestionArtifact,
|
19 |
+
DataTransformationArtifact,
|
20 |
+
CollaborativeModelArtifact,
|
21 |
+
ContentBasedModelArtifact,
|
22 |
+
)
|
23 |
+
|
24 |
+
class TrainingPipeline:
|
25 |
+
"""
|
26 |
+
Orchestrates the entire anime recommender training pipeline, including
|
27 |
+
data ingestion, transformation, model training, and popularity-based recommendations.
|
28 |
+
"""
|
29 |
+
def __init__(self):
|
30 |
+
"""
|
31 |
+
Initialize the TrainingPipeline with required configurations.
|
32 |
+
"""
|
33 |
+
self.training_pipeline_config = TrainingPipelineConfig()
|
34 |
+
|
35 |
+
def start_data_ingestion(self) -> DataIngestionArtifact:
|
36 |
+
"""
|
37 |
+
Starts the data ingestion process.
|
38 |
+
Returns:
|
39 |
+
DataIngestionArtifact: Contains information about ingested data.
|
40 |
+
"""
|
41 |
+
try:
|
42 |
+
logging.info("Initiating Data Ingestion...")
|
43 |
+
data_ingestion_config = DataIngestionConfig(self.training_pipeline_config)
|
44 |
+
data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config)
|
45 |
+
data_ingestion_artifact = data_ingestion.ingest_data()
|
46 |
+
logging.info(f"Data Ingestion completed: {data_ingestion_artifact}")
|
47 |
+
return data_ingestion_artifact
|
48 |
+
except Exception as e:
|
49 |
+
raise AnimeRecommendorException(e, sys)
|
50 |
+
|
51 |
+
def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataTransformationArtifact:
|
52 |
+
"""
|
53 |
+
Starts the data transformation process.
|
54 |
+
Returns:
|
55 |
+
DataTransformationArtifact: Contains transformed data.
|
56 |
+
"""
|
57 |
+
try:
|
58 |
+
logging.info("Initiating Data Transformation...")
|
59 |
+
data_transformation_config = DataTransformationConfig(self.training_pipeline_config)
|
60 |
+
data_transformation = DataTransformation(
|
61 |
+
data_ingestion_artifact=data_ingestion_artifact,
|
62 |
+
data_transformation_config=data_transformation_config
|
63 |
+
)
|
64 |
+
data_transformation_artifact = data_transformation.initiate_data_transformation()
|
65 |
+
logging.info(f"Data Transformation completed: {data_transformation_artifact}")
|
66 |
+
return data_transformation_artifact
|
67 |
+
except Exception as e:
|
68 |
+
raise AnimeRecommendorException(e, sys)
|
69 |
+
|
70 |
+
def start_collaborative_model_training(self, data_transformation_artifact: DataTransformationArtifact) -> CollaborativeModelArtifact:
|
71 |
+
"""
|
72 |
+
Starts collaborative filtering model training.
|
73 |
+
Returns:
|
74 |
+
CollaborativeModelTrainerArtifact: Trained collaborative model artifact.
|
75 |
+
"""
|
76 |
+
try:
|
77 |
+
logging.info("Initiating Collaborative Model Training...")
|
78 |
+
collaborative_model_config = CollaborativeModelConfig(self.training_pipeline_config)
|
79 |
+
collaborative_model_trainer = CollaborativeModelTrainer(
|
80 |
+
collaborative_model_trainer_config=collaborative_model_config,
|
81 |
+
data_transformation_artifact=data_transformation_artifact
|
82 |
+
)
|
83 |
+
collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='user_knn')
|
84 |
+
logging.info(f"Collaborative Model Training completed: {collaborative_model_trainer_artifact}")
|
85 |
+
return collaborative_model_trainer_artifact
|
86 |
+
except Exception as e:
|
87 |
+
raise AnimeRecommendorException(e, sys)
|
88 |
+
|
89 |
+
def start_content_based_model_training(self, data_ingestion_artifact: DataIngestionArtifact) -> ContentBasedModelArtifact:
|
90 |
+
"""
|
91 |
+
Starts content-based filtering model training.
|
92 |
+
Returns:
|
93 |
+
ContentBasedModelTrainerArtifact: Trained content-based model artifact.
|
94 |
+
"""
|
95 |
+
try:
|
96 |
+
logging.info("Initiating Content-Based Model Training...")
|
97 |
+
content_based_model_config = ContentBasedModelConfig(self.training_pipeline_config)
|
98 |
+
content_based_model_trainer = ContentBasedModelTrainer(
|
99 |
+
content_based_model_trainer_config=content_based_model_config,
|
100 |
+
data_ingestion_artifact=data_ingestion_artifact
|
101 |
+
)
|
102 |
+
content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
|
103 |
+
logging.info(f"Content-Based Model Training completed: {content_based_model_trainer_artifact}")
|
104 |
+
return content_based_model_trainer_artifact
|
105 |
+
except Exception as e:
|
106 |
+
raise AnimeRecommendorException(e, sys)
|
107 |
+
|
108 |
+
def start_popularity_based_filtering(self, data_ingestion_artifact: DataIngestionArtifact):
|
109 |
+
"""
|
110 |
+
Generates popularity-based recommendations.
|
111 |
+
"""
|
112 |
+
try:
|
113 |
+
logging.info("Initiating Popularity-Based Filtering...")
|
114 |
+
filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
|
115 |
+
recommendations = filtering.initiate_model_trainer(filter_type='popular_animes')
|
116 |
+
logging.info("Popularity-Based Filtering completed.")
|
117 |
+
return recommendations
|
118 |
+
except Exception as e:
|
119 |
+
raise AnimeRecommendorException(e, sys)
|
120 |
+
|
121 |
+
def run_pipeline(self):
|
122 |
+
"""
|
123 |
+
Executes the entire training pipeline.
|
124 |
+
"""
|
125 |
+
try:
|
126 |
+
# Data Ingestion
|
127 |
+
data_ingestion_artifact = self.start_data_ingestion()
|
128 |
+
|
129 |
+
# Data Transformation
|
130 |
+
data_transformation_artifact = self.start_data_transformation(data_ingestion_artifact)
|
131 |
+
|
132 |
+
# Collaborative Model Training
|
133 |
+
collaborative_model_trainer_artifact = self.start_collaborative_model_training(data_transformation_artifact)
|
134 |
+
|
135 |
+
# Content-Based Model Training
|
136 |
+
content_based_model_trainer_artifact = self.start_content_based_model_training(data_ingestion_artifact)
|
137 |
+
|
138 |
+
# Popularity-Based Filtering
|
139 |
+
popularity_recommendations = self.start_popularity_based_filtering(data_ingestion_artifact)
|
140 |
+
|
141 |
+
logging.info("Training Pipeline executed successfully.")
|
142 |
+
except Exception as e:
|
143 |
+
raise AnimeRecommendorException(e, sys)
|
144 |
+
|
145 |
+
|
146 |
+
if __name__ == "__main__":
|
147 |
+
try:
|
148 |
+
pipeline = TrainingPipeline()
|
149 |
+
pipeline.run_pipeline()
|
150 |
+
except Exception as e:
|
151 |
+
logging.error(f"Pipeline execution failed: {str(e)}")
|
152 |
+
raise AnimeRecommendorException(e, sys)
|
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
-
import sys
|
2 |
import pandas as pd
|
3 |
import streamlit as st
|
4 |
-
from anime_recommender.
|
5 |
-
from anime_recommender.
|
6 |
-
from anime_recommender.
|
7 |
import joblib
|
|
|
8 |
from huggingface_hub import hf_hub_download
|
9 |
from datasets import load_dataset
|
10 |
|
@@ -12,14 +12,38 @@ st.set_page_config(page_title="Anime Recommendation System", layout="wide")
|
|
12 |
|
13 |
if "anime_data" not in st.session_state or "anime_user_ratings" not in st.session_state:
|
14 |
# Load datasets from Hugging Face (assuming no splits)
|
15 |
-
animedataset = load_dataset(
|
16 |
-
mergeddataset = load_dataset(
|
17 |
|
18 |
# Convert the dataset to Pandas DataFrame
|
19 |
st.session_state.anime_data = pd.DataFrame(animedataset["train"])
|
20 |
st.session_state.anime_user_ratings = pd.DataFrame(mergeddataset["train"])
|
21 |
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
# Access the data from session state
|
24 |
anime_data = st.session_state.anime_data
|
25 |
anime_user_ratings = st.session_state.anime_user_ratings
|
@@ -31,25 +55,30 @@ st.dataframe(anime_data)
|
|
31 |
st.write("Anime User Ratings Data:")
|
32 |
st.dataframe(anime_user_ratings)
|
33 |
|
34 |
-
# Define your repository name
|
35 |
-
repo_name = "krishnaveni76/anime-recommendation-models"
|
|
|
|
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
-
|
40 |
-
user_based_knn_model_path = hf_hub_download(repo_name, "userbasedknn.pkl")
|
41 |
-
svd_model_path = hf_hub_download(repo_name, "svd.pkl")
|
42 |
|
43 |
-
with open(item_based_knn_model_path, "rb") as f:
|
44 |
-
|
45 |
|
46 |
-
with open(user_based_knn_model_path, "rb") as f:
|
47 |
-
|
48 |
|
49 |
-
with open(svd_model_path, "rb") as f:
|
50 |
-
|
51 |
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
53 |
print("Models loaded successfully!")
|
54 |
|
55 |
# Streamlit UI
|
@@ -113,8 +142,7 @@ if app_selector == "Content-Based Recommender":
|
|
113 |
elif app_selector == "Collaborative Recommender":
|
114 |
st.title("Collaborative Recommender System")
|
115 |
|
116 |
-
try:
|
117 |
-
|
118 |
# Sidebar for choosing the collaborative filtering method
|
119 |
collaborative_method = st.sidebar.selectbox(
|
120 |
"Choose a collaborative filtering method:",
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import streamlit as st
|
3 |
+
from anime_recommender.model_trainer.content_based_modelling import ContentBasedRecommender
|
4 |
+
from anime_recommender.model_trainer.collaborative_modelling import CollaborativeAnimeRecommender
|
5 |
+
from anime_recommender.model_trainer.top_anime_filtering import PopularityBasedFiltering
|
6 |
import joblib
|
7 |
+
from anime_recommender.constant import *
|
8 |
from huggingface_hub import hf_hub_download
|
9 |
from datasets import load_dataset
|
10 |
|
|
|
12 |
|
13 |
if "anime_data" not in st.session_state or "anime_user_ratings" not in st.session_state:
|
14 |
# Load datasets from Hugging Face (assuming no splits)
|
15 |
+
animedataset = load_dataset(ANIME_FILE_PATH, split=None)
|
16 |
+
mergeddataset = load_dataset(ANIMEUSERRATINGS_FILE_PATH, split=None)
|
17 |
|
18 |
# Convert the dataset to Pandas DataFrame
|
19 |
st.session_state.anime_data = pd.DataFrame(animedataset["train"])
|
20 |
st.session_state.anime_user_ratings = pd.DataFrame(mergeddataset["train"])
|
21 |
|
22 |
+
# Load models only once
|
23 |
+
if "models_loaded" not in st.session_state:
|
24 |
+
st.session_state.models_loaded = {}
|
25 |
+
|
26 |
+
# Define your repository name
|
27 |
+
repo_name = MODELS_FILEPATH
|
28 |
+
|
29 |
+
# Load models
|
30 |
+
st.session_state.models_loaded["cosine_similarity_model"] = hf_hub_download(repo_name, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
|
31 |
+
st.session_state.models_loaded["item_based_knn_model_path"] = hf_hub_download(repo_name, MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
|
32 |
+
st.session_state.models_loaded["user_based_knn_model_path"] = hf_hub_download(repo_name, MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
|
33 |
+
st.session_state.models_loaded["svd_model_path"] = hf_hub_download(repo_name, MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
|
34 |
+
|
35 |
+
# Load the models using joblib
|
36 |
+
with open(st.session_state.models_loaded["item_based_knn_model_path"], "rb") as f:
|
37 |
+
st.session_state.models_loaded["item_based_knn_model"] = joblib.load(f)
|
38 |
+
|
39 |
+
with open(st.session_state.models_loaded["user_based_knn_model_path"], "rb") as f:
|
40 |
+
st.session_state.models_loaded["user_based_knn_model"] = joblib.load(f)
|
41 |
+
|
42 |
+
with open(st.session_state.models_loaded["svd_model_path"], "rb") as f:
|
43 |
+
st.session_state.models_loaded["svd_model"] = joblib.load(f)
|
44 |
+
|
45 |
+
print("Models loaded successfully!")
|
46 |
+
|
47 |
# Access the data from session state
|
48 |
anime_data = st.session_state.anime_data
|
49 |
anime_user_ratings = st.session_state.anime_user_ratings
|
|
|
55 |
st.write("Anime User Ratings Data:")
|
56 |
st.dataframe(anime_user_ratings)
|
57 |
|
58 |
+
# # Define your repository name
|
59 |
+
# repo_name = "krishnaveni76/anime-recommendation-models"
|
60 |
+
|
61 |
+
# # Load models
|
62 |
|
63 |
+
# item_based_knn_model_path = hf_hub_download(repo_name, MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME)
|
64 |
+
# user_based_knn_model_path = hf_hub_download(repo_name, MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME)
|
65 |
+
# svd_model_path = hf_hub_download(repo_name,MODEL_TRAINER_SVD_TRAINED_MODEL_NAME)
|
|
|
|
|
66 |
|
67 |
+
# with open(item_based_knn_model_path, "rb") as f:
|
68 |
+
# item_based_knn_model = joblib.load(f)
|
69 |
|
70 |
+
# with open(user_based_knn_model_path, "rb") as f:
|
71 |
+
# user_based_knn_model = joblib.load(f)
|
72 |
|
73 |
+
# with open(svd_model_path, "rb") as f:
|
74 |
+
# svd_model = joblib.load(f)
|
75 |
|
76 |
+
|
77 |
+
# Access the models from session state
|
78 |
+
cosine_similarity_model_path = hf_hub_download(repo_name, MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME)
|
79 |
+
item_based_knn_model = st.session_state.models_loaded["item_based_knn_model"]
|
80 |
+
user_based_knn_model = st.session_state.models_loaded["user_based_knn_model"]
|
81 |
+
svd_model = st.session_state.models_loaded["svd_model"]
|
82 |
print("Models loaded successfully!")
|
83 |
|
84 |
# Streamlit UI
|
|
|
142 |
elif app_selector == "Collaborative Recommender":
|
143 |
st.title("Collaborative Recommender System")
|
144 |
|
145 |
+
try:
|
|
|
146 |
# Sidebar for choosing the collaborative filtering method
|
147 |
collaborative_method = st.sidebar.selectbox(
|
148 |
"Choose a collaborative filtering method:",
|
requirements.txt
CHANGED
@@ -6,4 +6,4 @@ transformers
|
|
6 |
huggingface_hub
|
7 |
datasets
|
8 |
scikit-surprise
|
9 |
-
-e .
|
|
|
6 |
huggingface_hub
|
7 |
datasets
|
8 |
scikit-surprise
|
9 |
+
# -e .
|
run_pipeline.py
CHANGED
@@ -8,6 +8,7 @@ from anime_recommender.source.collaborative_recommender import CollaborativeMode
|
|
8 |
from anime_recommender.source.content_based_recommender import ContentBasedModelTrainer
|
9 |
from anime_recommender.source.top_anime_recommenders import PopularityBasedRecommendor
|
10 |
|
|
|
11 |
if __name__ == "__main__":
|
12 |
try:
|
13 |
training_pipeline_config = TrainingPipelineConfig()
|
|
|
8 |
from anime_recommender.source.content_based_recommender import ContentBasedModelTrainer
|
9 |
from anime_recommender.source.top_anime_recommenders import PopularityBasedRecommendor
|
10 |
|
11 |
+
|
12 |
if __name__ == "__main__":
|
13 |
try:
|
14 |
training_pipeline_config = TrainingPipelineConfig()
|