Spaces:

krishnaveni76
/

Anime-Recommendation-System

Running

App Files Files Community

krishnaveni76 commited on Jan 27

Commit

660bb11

1 Parent(s): a95ae8b

Data ingestion completed

Browse files

Files changed (28) hide show

.gitignore +3 -1
anime_recommender/__pycache__/__init__.cpython-310.pyc +0 -0
anime_recommender/constant/__init__.py +43 -0
anime_recommender/constant/__pycache__/__init__.cpython-310.pyc +0 -0
anime_recommender/{constants → entity}/__init__.py +0 -0
anime_recommender/entity/__pycache__/__init__.cpython-310.pyc +0 -0
anime_recommender/entity/__pycache__/artifact_entity.cpython-310.pyc +0 -0
anime_recommender/entity/__pycache__/config_entity.cpython-310.pyc +0 -0
anime_recommender/entity/artifact_entity.py +7 -0
anime_recommender/entity/config_entity.py +31 -0
anime_recommender/exception/__init__.py +0 -0
anime_recommender/exception/__pycache__/__init__.cpython-310.pyc +0 -0
anime_recommender/exception/__pycache__/exception.cpython-310.pyc +0 -0
anime_recommender/exception/exception.py +44 -0
anime_recommender/loggers/__init__.py +0 -0
anime_recommender/loggers/__pycache__/__init__.cpython-310.pyc +0 -0
anime_recommender/loggers/__pycache__/logging.cpython-310.pyc +0 -0
anime_recommender/loggers/logging.py +16 -0
anime_recommender/source/__pycache__/__init__.cpython-310.pyc +0 -0
anime_recommender/source/__pycache__/data_ingestion.cpython-310.pyc +0 -0
anime_recommender/source/data_ingestion.py +58 -0
anime_recommender/utils/__init__.py +0 -0
anime_recommender/utils/__pycache__/__init__.cpython-310.pyc +0 -0
anime_recommender/utils/main_utils/__init__.py +0 -0
anime_recommender/utils/main_utils/__pycache__/__init__.cpython-310.pyc +0 -0
anime_recommender/utils/main_utils/__pycache__/utils.cpython-310.pyc +0 -0
anime_recommender/utils/main_utils/utils.py +47 -0
run_pipeline.py +53 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,4 @@
 ars/
-.env

 ars/
+.env
+Artifacts/
+logs/

anime_recommender/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (218 Bytes). View file

anime_recommender/constant/__init__.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import os
+"""
+Defining common constant variables for training pipeline
+"""
+PIPELINE_NAME: str = "AnimeRecommendor"
+ARTIFACT_DIR: str = "Artifacts"
+ANIME_FILE_NAME: str = "Animes.csv"
+RATING_FILE_NAME:str = "UserRatings.csv"
+MERGED_FILE_NAME:str = "Anime_UserRatings.csv"
+ZIP_FILE_PATH:str = 'datasets/archive.zip'
+DATASETS_FILE_PATH:str = "datasets"
+ANIME_FILE_PATH:str = "krishnaveni76/Animes"
+RATING_FILE_PATH:str = "krishnaveni76/UserRatings"
+ANIMEUSERRATINGS_FILE_PATH:str = "krishnaveni76/Anime_UserRatings"
+"""
+Data Ingestion related constant start with DATA_INGESTION VAR NAME
+"""
+DATA_INGESTION_DIR_NAME: str = "data_ingestion"
+DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store"
+DATA_INGESTION_INGESTED_DIR: str = "ingested"
+"""
+Data Transformation related constant start with DATA_VALIDATION VAR NAME
+"""
+DATA_TRANSFORMATION_DIR:str = "data_transformation"
+DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR:str = "transformed"
+"""
+Model Trainer related constant start with MODEL TRAINER VAR NAME
+"""
+MODEL_TRAINER_DIR_NAME: str = "trained_models"
+MODEL_TRAINER_COL_TRAINED_MODEL_DIR: str = "collaborative_recommenders"
+MODEL_TRAINER_SVD_TRAINED_MODEL_NAME: str = "svd.pkl"
+MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME: str = "itembasedknn.pkl"
+MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME: str = "userbasedknn.pkl"
+MODEL_TRAINER_CON_TRAINED_MODEL_DIR:str = "content_based_recommenders"
+MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME:str = "cosine_similarity.pkl"
+MODEL_TRAINER_POP_TRAINED_MODEL_DIR:str = "popularity_based_recommenders"

anime_recommender/constant/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.8 kB). View file

anime_recommender/{constants → entity}/__init__.py RENAMED Viewed

File without changes

anime_recommender/entity/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (225 Bytes). View file

anime_recommender/entity/__pycache__/artifact_entity.cpython-310.pyc ADDED Viewed

Binary file (601 Bytes). View file

anime_recommender/entity/__pycache__/config_entity.cpython-310.pyc ADDED Viewed

Binary file (1.87 kB). View file

anime_recommender/entity/artifact_entity.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from dataclasses import dataclass
+from typing import Optional
+@dataclass
+class DataIngestionArtifact:
+    feature_store_anime_file_path:str
+    feature_store_userrating_file_path:str

anime_recommender/entity/config_entity.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os
+from datetime import datetime
+from anime_recommender.constant import *
+class TrainingPipelineConfig:
+    """
+    Configuration for the training pipeline, including artifact directory and timestamp.
+    """
+    def __init__(self, timestamp=datetime.now()):
+        """
+        Initialize the configuration with a unique timestamp.
+        """
+        timestamp = timestamp.strftime("%m_%d_%Y_%H_%M_%S")
+        self.pipeline_name = PIPELINE_NAME
+        self.artifact_dir = os.path.join(ARTIFACT_DIR, timestamp)
+        self.model_dir=os.path.join("final_model")
+        self.timestamp: str = timestamp
+class DataIngestionConfig:
+    """
+    Configuration for data ingestion, including paths for feature store, train, test, and validation files.
+    """
+    def __init__(self, training_pipeline_config: TrainingPipelineConfig):
+        """
+        Initialize data ingestion paths and parameters.
+        """
+        self.data_ingestion_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_INGESTION_DIR_NAME)
+        self.feature_store_anime_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, ANIME_FILE_NAME)
+        self.feature_store_userrating_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, RATING_FILE_NAME)
+        self.anime_filepath: str = ANIME_FILE_PATH
+        self.rating_filepath: str = RATING_FILE_PATH

anime_recommender/exception/__init__.py ADDED Viewed

File without changes

anime_recommender/exception/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (228 Bytes). View file

anime_recommender/exception/__pycache__/exception.cpython-310.pyc ADDED Viewed

Binary file (2.1 kB). View file

anime_recommender/exception/exception.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import sys
+class AnimeRecommendorException(Exception):
+    """
+    Custom exception class for handling errors in the Energy Generation Prediction project.
+    This class captures the error message, file name, and line number where an exception occurred.
+    It is useful for debugging and identifying the source of the error in a structured way.
+    """
+    def __init__(self,error_message, error_details:sys):
+        """
+        Initialize the EnergyGenerationException instance.
+        Args:
+            error_message (str): The error message describing the exception.
+            error_details (sys): The sys module, used to extract exception details.
+        Attributes:
+            error_message (str): Stores the original error message.
+            lineno (int): The line number where the exception occurred.
+            file_name (str): The file name where the exception occurred.
+        """
+        self.error_message = error_message
+        _,_,exc_tb = error_details.exc_info()
+        self.lineno = exc_tb.tb_lineno
+        self.file_name = exc_tb.tb_frame.f_code.co_filename
+    def __str__(self):
+        """
+        Return the formatted error message.
+        Returns:
+            str: A string containing the file name, line number, and error message.
+        """
+        return "Error occured in python script name [{0}] line number [{1}] error message [{2}]".format(
+            self.file_name,self.lineno, str(self.error_message))
+if __name__=="__main__":
+    try:
+        a = 1/0  # This example will raise a ZeroDivisionError
+        print("This will not be printed",a)
+    except Exception as e:
+        raise AnimeRecommendorException(e,sys)

anime_recommender/loggers/__init__.py ADDED Viewed

File without changes

anime_recommender/loggers/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (226 Bytes). View file

anime_recommender/loggers/__pycache__/logging.cpython-310.pyc ADDED Viewed

Binary file (630 Bytes). View file

anime_recommender/loggers/logging.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import os
+import logging
+from datetime import datetime
+LOGS_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
+logs_dir = os.path.join(os.getcwd(), "logs")
+os.makedirs(logs_dir, exist_ok=True)
+LOGS_FILE_PATH = os.path.join(logs_dir,LOGS_FILE)
+logging.basicConfig(
+    filename= LOGS_FILE_PATH,
+    format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
+    level= logging.INFO,
+)

anime_recommender/source/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (225 Bytes). View file

anime_recommender/source/__pycache__/data_ingestion.cpython-310.pyc ADDED Viewed

Binary file (2.37 kB). View file

anime_recommender/source/data_ingestion.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+import sys
+import pandas as pd
+from datasets import load_dataset
+from anime_recommender.loggers.logging import logging
+from anime_recommender.exception.exception import AnimeRecommendorException
+from anime_recommender.entity.config_entity import DataIngestionConfig
+from anime_recommender.entity.artifact_entity import DataIngestionArtifact
+from anime_recommender.utils.main_utils.utils import export_data_to_dataframe
+class DataIngestion:
+    def __init__(self, data_ingestion_config: DataIngestionConfig):
+        try:
+            self.data_ingestion_config = data_ingestion_config
+        except Exception as e:
+            raise AnimeRecommendorException(e, sys)
+    def fetch_data_from_huggingface(self, dataset_path: str, split: str = None) -> pd.DataFrame:
+        try:
+            logging.info(f"Fetching data from Hugging Face dataset: {dataset_path}")
+            # Load dataset from Hugging Face
+            dataset = load_dataset(dataset_path, split=split)
+            # Convert dataset to pandas DataFrame
+            df = pd.DataFrame(dataset['train'])
+            # Log some information about the data
+            logging.info(f"Shape of the dataframe: {df.shape}")
+            logging.info(f"Column names: {df.columns}")
+            logging.info(f"Preview of the DataFrame:\n{df.head()}")
+            logging.info("Data fetched successfully from Hugging Face.")
+            return df
+        except Exception as e:
+            logging.error(f"An error occurred while fetching data: {str(e)}")
+            raise AnimeRecommendorException(e, sys)
+    def ingest_data(self) -> DataIngestionArtifact:
+        try:
+            # Load anime and rating data from Hugging Face datasets
+            anime_df = self.fetch_data_from_huggingface(self.data_ingestion_config.anime_filepath)
+            rating_df = self.fetch_data_from_huggingface(self.data_ingestion_config.rating_filepath)
+            # Export data to DataFrame
+            export_data_to_dataframe(anime_df, file_path=self.data_ingestion_config.feature_store_anime_file_path)
+            export_data_to_dataframe(rating_df, file_path=self.data_ingestion_config.feature_store_userrating_file_path)
+            # Create artifact to store data ingestion info
+            dataingestionartifact = DataIngestionArtifact(
+                feature_store_anime_file_path=self.data_ingestion_config.feature_store_anime_file_path,
+                feature_store_userrating_file_path=self.data_ingestion_config.feature_store_userrating_file_path
+            )
+            return dataingestionartifact
+        except Exception as e:
+            raise AnimeRecommendorException(e, sys)

anime_recommender/utils/__init__.py ADDED Viewed

File without changes

anime_recommender/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (224 Bytes). View file

anime_recommender/utils/main_utils/__init__.py ADDED Viewed

File without changes

anime_recommender/utils/main_utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (235 Bytes). View file

anime_recommender/utils/main_utils/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (2.06 kB). View file

anime_recommender/utils/main_utils/utils.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+import sys
+import time
+import pandas as pd
+import joblib
+from anime_recommender.loggers.logging import logging
+from anime_recommender.exception.exception import AnimeRecommendorException
+from anime_recommender.constant import *
+def export_data_to_dataframe(dataframe: pd.DataFrame, file_path: str) -> pd.DataFrame:
+        try:
+            logging.info(f"Saving DataFrame to file: {file_path}")
+            dir_path = os.path.dirname(file_path)
+            os.makedirs(dir_path, exist_ok=True)
+            dataframe.to_csv(file_path, index=False, header=True)
+            logging.info(f"DataFrame saved successfully to {file_path}.")
+            return dataframe
+        except Exception as e:
+            raise AnimeRecommendorException(e, sys)
+def load_csv_data(file_path: str) -> pd.DataFrame:
+    try:
+        df = pd.read_csv(file_path)
+        return df
+    except Exception as e:
+        raise AnimeRecommendorException(e, sys) from e
+def save_model(model: object,file_path: str ) -> None:
+    try:
+        logging.info("Entered the save_model method of Main utils class")
+        os.makedirs(os.path.dirname(file_path), exist_ok=True)
+        with open(file_path, "wb") as file_obj:
+            joblib.dump(model, file_obj)
+        logging.info("Completed saving the model object.")
+    except Exception as e:
+        raise AnimeRecommendorException(e, sys) from e
+def load_object(file_path:str)-> object:
+    try:
+        if not os.path.exists(file_path):
+            raise Exception(f"The file: {file_path} is not exists")
+        with open(file_path,"rb") as file_obj:
+            print(file_obj)
+            return joblib.load(file_obj)
+    except Exception as e:
+        raise AnimeRecommendorException(e,sys) from e

run_pipeline.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import sys
+from anime_recommender.loggers.logging import logging
+from anime_recommender.exception.exception import AnimeRecommendorException
+from anime_recommender.source.data_ingestion import DataIngestion
+from anime_recommender.entity.config_entity import TrainingPipelineConfig,DataIngestionConfig
+# ,DataTransformationConfig,CollaborativeModelConfig,ContentBasedModelConfig
+# from anime_recommender.source.data_transformation import DataTransformation
+# from anime_recommender.source.collaborative_recommenders import CollaborativeModelTrainer
+# from anime_recommender.source.content_based_recommenders import ContentBasedModelTrainer
+# from anime_recommender.source.popularity_based_recommenders import PopularityBasedRecommendor
+if __name__ == "__main__":
+    try:
+        training_pipeline_config = TrainingPipelineConfig()
+        data_ingestion_config = DataIngestionConfig(training_pipeline_config)
+        data_ingestion = DataIngestion(data_ingestion_config)
+        logging.info("Initiating Data Ingestion.")
+        data_ingestion_artifact = data_ingestion.ingest_data()
+        logging.info(f"Data ingestion completed.")
+        print(data_ingestion_artifact)
+        # # Data Transformation
+        # data_transformation_config = DataTransformationConfig(training_pipeline_config)
+        # data_transformation = DataTransformation(data_ingestion_artifact,data_transformation_config)
+        # logging.info("Initiating Data Transformation.")
+        # data_transformation_artifact = data_transformation.initiate_data_transformation()
+        # logging.info("Data Transformation Completed.")
+        # print(data_transformation_artifact)
+        # # Collaborative Model Training
+        # collaborative_model_trainer_config = CollaborativeModelConfig(training_pipeline_config)
+        # collaborative_model_trainer = CollaborativeModelTrainer(collaborative_model_trainer_config= collaborative_model_trainer_config,data_transformation_artifact=data_transformation_artifact)
+        # logging.info("Initiating Collaborative Model training.")
+        # collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='svd')
+        # logging.info("Collaborative Model training completed.")
+        # print(collaborative_model_trainer_artifact)
+        # # Content Based Model Training
+        # content_based_model_trainer_config = ContentBasedModelConfig(training_pipeline_config)
+        # content_based_model_trainer = ContentBasedModelTrainer(content_based_model_trainer_config=content_based_model_trainer_config,data_ingestion_artifact=data_ingestion_artifact)
+        # logging.info("Initiating Content Based Model training.")
+        # content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
+        # logging.info("Content Based Model training completed.")
+        # print(content_based_model_trainer_artifact)
+        # # Popularity Based Filtering
+        # logging.info("Initiating Popularity based filtering.")
+        # filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
+        # popularity_recommendations =  filtering.initiate_model_trainer(filter_type='top_avg_rated')
+        # logging.info("Popularity based filtering completed.")
+    except Exception as e:
+            raise AnimeRecommendorException(e, sys)