krishnaveni76 commited on
Commit
660bb11
·
1 Parent(s): a95ae8b

Data ingestion completed

Browse files
Files changed (28) hide show
  1. .gitignore +3 -1
  2. anime_recommender/__pycache__/__init__.cpython-310.pyc +0 -0
  3. anime_recommender/constant/__init__.py +43 -0
  4. anime_recommender/constant/__pycache__/__init__.cpython-310.pyc +0 -0
  5. anime_recommender/{constants → entity}/__init__.py +0 -0
  6. anime_recommender/entity/__pycache__/__init__.cpython-310.pyc +0 -0
  7. anime_recommender/entity/__pycache__/artifact_entity.cpython-310.pyc +0 -0
  8. anime_recommender/entity/__pycache__/config_entity.cpython-310.pyc +0 -0
  9. anime_recommender/entity/artifact_entity.py +7 -0
  10. anime_recommender/entity/config_entity.py +31 -0
  11. anime_recommender/exception/__init__.py +0 -0
  12. anime_recommender/exception/__pycache__/__init__.cpython-310.pyc +0 -0
  13. anime_recommender/exception/__pycache__/exception.cpython-310.pyc +0 -0
  14. anime_recommender/exception/exception.py +44 -0
  15. anime_recommender/loggers/__init__.py +0 -0
  16. anime_recommender/loggers/__pycache__/__init__.cpython-310.pyc +0 -0
  17. anime_recommender/loggers/__pycache__/logging.cpython-310.pyc +0 -0
  18. anime_recommender/loggers/logging.py +16 -0
  19. anime_recommender/source/__pycache__/__init__.cpython-310.pyc +0 -0
  20. anime_recommender/source/__pycache__/data_ingestion.cpython-310.pyc +0 -0
  21. anime_recommender/source/data_ingestion.py +58 -0
  22. anime_recommender/utils/__init__.py +0 -0
  23. anime_recommender/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  24. anime_recommender/utils/main_utils/__init__.py +0 -0
  25. anime_recommender/utils/main_utils/__pycache__/__init__.cpython-310.pyc +0 -0
  26. anime_recommender/utils/main_utils/__pycache__/utils.cpython-310.pyc +0 -0
  27. anime_recommender/utils/main_utils/utils.py +47 -0
  28. run_pipeline.py +53 -0
.gitignore CHANGED
@@ -1,2 +1,4 @@
1
  ars/
2
- .env
 
 
 
1
  ars/
2
+ .env
3
+ Artifacts/
4
+ logs/
anime_recommender/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (218 Bytes). View file
 
anime_recommender/constant/__init__.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ """
3
+ Defining common constant variables for training pipeline
4
+ """
5
+ PIPELINE_NAME: str = "AnimeRecommendor"
6
+ ARTIFACT_DIR: str = "Artifacts"
7
+ ANIME_FILE_NAME: str = "Animes.csv"
8
+ RATING_FILE_NAME:str = "UserRatings.csv"
9
+ MERGED_FILE_NAME:str = "Anime_UserRatings.csv"
10
+ ZIP_FILE_PATH:str = 'datasets/archive.zip'
11
+ DATASETS_FILE_PATH:str = "datasets"
12
+
13
+ ANIME_FILE_PATH:str = "krishnaveni76/Animes"
14
+ RATING_FILE_PATH:str = "krishnaveni76/UserRatings"
15
+ ANIMEUSERRATINGS_FILE_PATH:str = "krishnaveni76/Anime_UserRatings"
16
+
17
+ """
18
+ Data Ingestion related constant start with DATA_INGESTION VAR NAME
19
+ """
20
+ DATA_INGESTION_DIR_NAME: str = "data_ingestion"
21
+ DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store"
22
+ DATA_INGESTION_INGESTED_DIR: str = "ingested"
23
+
24
+ """
25
+ Data Transformation related constant start with DATA_VALIDATION VAR NAME
26
+ """
27
+ DATA_TRANSFORMATION_DIR:str = "data_transformation"
28
+ DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR:str = "transformed"
29
+
30
+ """
31
+ Model Trainer related constant start with MODEL TRAINER VAR NAME
32
+ """
33
+ MODEL_TRAINER_DIR_NAME: str = "trained_models"
34
+
35
+ MODEL_TRAINER_COL_TRAINED_MODEL_DIR: str = "collaborative_recommenders"
36
+ MODEL_TRAINER_SVD_TRAINED_MODEL_NAME: str = "svd.pkl"
37
+ MODEL_TRAINER_ITEM_KNN_TRAINED_MODEL_NAME: str = "itembasedknn.pkl"
38
+ MODEL_TRAINER_USER_KNN_TRAINED_MODEL_NAME: str = "userbasedknn.pkl"
39
+
40
+ MODEL_TRAINER_CON_TRAINED_MODEL_DIR:str = "content_based_recommenders"
41
+ MODEL_TRAINER_COSINESIMILARITY_MODEL_NAME:str = "cosine_similarity.pkl"
42
+
43
+ MODEL_TRAINER_POP_TRAINED_MODEL_DIR:str = "popularity_based_recommenders"
anime_recommender/constant/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (1.8 kB). View file
 
anime_recommender/{constants → entity}/__init__.py RENAMED
File without changes
anime_recommender/entity/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (225 Bytes). View file
 
anime_recommender/entity/__pycache__/artifact_entity.cpython-310.pyc ADDED
Binary file (601 Bytes). View file
 
anime_recommender/entity/__pycache__/config_entity.cpython-310.pyc ADDED
Binary file (1.87 kB). View file
 
anime_recommender/entity/artifact_entity.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+ @dataclass
5
+ class DataIngestionArtifact:
6
+ feature_store_anime_file_path:str
7
+ feature_store_userrating_file_path:str
anime_recommender/entity/config_entity.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+ from anime_recommender.constant import *
4
+
5
+ class TrainingPipelineConfig:
6
+ """
7
+ Configuration for the training pipeline, including artifact directory and timestamp.
8
+ """
9
+ def __init__(self, timestamp=datetime.now()):
10
+ """
11
+ Initialize the configuration with a unique timestamp.
12
+ """
13
+ timestamp = timestamp.strftime("%m_%d_%Y_%H_%M_%S")
14
+ self.pipeline_name = PIPELINE_NAME
15
+ self.artifact_dir = os.path.join(ARTIFACT_DIR, timestamp)
16
+ self.model_dir=os.path.join("final_model")
17
+ self.timestamp: str = timestamp
18
+
19
+ class DataIngestionConfig:
20
+ """
21
+ Configuration for data ingestion, including paths for feature store, train, test, and validation files.
22
+ """
23
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
24
+ """
25
+ Initialize data ingestion paths and parameters.
26
+ """
27
+ self.data_ingestion_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_INGESTION_DIR_NAME)
28
+ self.feature_store_anime_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, ANIME_FILE_NAME)
29
+ self.feature_store_userrating_file_path: str = os.path.join(self.data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, RATING_FILE_NAME)
30
+ self.anime_filepath: str = ANIME_FILE_PATH
31
+ self.rating_filepath: str = RATING_FILE_PATH
anime_recommender/exception/__init__.py ADDED
File without changes
anime_recommender/exception/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (228 Bytes). View file
 
anime_recommender/exception/__pycache__/exception.cpython-310.pyc ADDED
Binary file (2.1 kB). View file
 
anime_recommender/exception/exception.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+ class AnimeRecommendorException(Exception):
4
+ """
5
+ Custom exception class for handling errors in the Energy Generation Prediction project.
6
+
7
+ This class captures the error message, file name, and line number where an exception occurred.
8
+ It is useful for debugging and identifying the source of the error in a structured way.
9
+ """
10
+ def __init__(self,error_message, error_details:sys):
11
+ """
12
+ Initialize the EnergyGenerationException instance.
13
+
14
+ Args:
15
+ error_message (str): The error message describing the exception.
16
+ error_details (sys): The sys module, used to extract exception details.
17
+
18
+ Attributes:
19
+ error_message (str): Stores the original error message.
20
+ lineno (int): The line number where the exception occurred.
21
+ file_name (str): The file name where the exception occurred.
22
+ """
23
+ self.error_message = error_message
24
+ _,_,exc_tb = error_details.exc_info()
25
+
26
+ self.lineno = exc_tb.tb_lineno
27
+ self.file_name = exc_tb.tb_frame.f_code.co_filename
28
+
29
+ def __str__(self):
30
+ """
31
+ Return the formatted error message.
32
+
33
+ Returns:
34
+ str: A string containing the file name, line number, and error message.
35
+ """
36
+ return "Error occured in python script name [{0}] line number [{1}] error message [{2}]".format(
37
+ self.file_name,self.lineno, str(self.error_message))
38
+
39
+ if __name__=="__main__":
40
+ try:
41
+ a = 1/0 # This example will raise a ZeroDivisionError
42
+ print("This will not be printed",a)
43
+ except Exception as e:
44
+ raise AnimeRecommendorException(e,sys)
anime_recommender/loggers/__init__.py ADDED
File without changes
anime_recommender/loggers/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (226 Bytes). View file
 
anime_recommender/loggers/__pycache__/logging.cpython-310.pyc ADDED
Binary file (630 Bytes). View file
 
anime_recommender/loggers/logging.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from datetime import datetime
4
+
5
+ LOGS_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
6
+
7
+ logs_dir = os.path.join(os.getcwd(), "logs")
8
+ os.makedirs(logs_dir, exist_ok=True)
9
+
10
+ LOGS_FILE_PATH = os.path.join(logs_dir,LOGS_FILE)
11
+
12
+ logging.basicConfig(
13
+ filename= LOGS_FILE_PATH,
14
+ format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
15
+ level= logging.INFO,
16
+ )
anime_recommender/source/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (225 Bytes). View file
 
anime_recommender/source/__pycache__/data_ingestion.cpython-310.pyc ADDED
Binary file (2.37 kB). View file
 
anime_recommender/source/data_ingestion.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import pandas as pd
4
+ from datasets import load_dataset
5
+ from anime_recommender.loggers.logging import logging
6
+ from anime_recommender.exception.exception import AnimeRecommendorException
7
+ from anime_recommender.entity.config_entity import DataIngestionConfig
8
+ from anime_recommender.entity.artifact_entity import DataIngestionArtifact
9
+ from anime_recommender.utils.main_utils.utils import export_data_to_dataframe
10
+
11
+ class DataIngestion:
12
+ def __init__(self, data_ingestion_config: DataIngestionConfig):
13
+ try:
14
+ self.data_ingestion_config = data_ingestion_config
15
+ except Exception as e:
16
+ raise AnimeRecommendorException(e, sys)
17
+
18
+ def fetch_data_from_huggingface(self, dataset_path: str, split: str = None) -> pd.DataFrame:
19
+ try:
20
+ logging.info(f"Fetching data from Hugging Face dataset: {dataset_path}")
21
+ # Load dataset from Hugging Face
22
+ dataset = load_dataset(dataset_path, split=split)
23
+
24
+ # Convert dataset to pandas DataFrame
25
+ df = pd.DataFrame(dataset['train'])
26
+
27
+ # Log some information about the data
28
+ logging.info(f"Shape of the dataframe: {df.shape}")
29
+ logging.info(f"Column names: {df.columns}")
30
+ logging.info(f"Preview of the DataFrame:\n{df.head()}")
31
+ logging.info("Data fetched successfully from Hugging Face.")
32
+
33
+ return df
34
+
35
+ except Exception as e:
36
+ logging.error(f"An error occurred while fetching data: {str(e)}")
37
+ raise AnimeRecommendorException(e, sys)
38
+
39
+ def ingest_data(self) -> DataIngestionArtifact:
40
+ try:
41
+ # Load anime and rating data from Hugging Face datasets
42
+ anime_df = self.fetch_data_from_huggingface(self.data_ingestion_config.anime_filepath)
43
+ rating_df = self.fetch_data_from_huggingface(self.data_ingestion_config.rating_filepath)
44
+
45
+ # Export data to DataFrame
46
+ export_data_to_dataframe(anime_df, file_path=self.data_ingestion_config.feature_store_anime_file_path)
47
+ export_data_to_dataframe(rating_df, file_path=self.data_ingestion_config.feature_store_userrating_file_path)
48
+
49
+ # Create artifact to store data ingestion info
50
+ dataingestionartifact = DataIngestionArtifact(
51
+ feature_store_anime_file_path=self.data_ingestion_config.feature_store_anime_file_path,
52
+ feature_store_userrating_file_path=self.data_ingestion_config.feature_store_userrating_file_path
53
+ )
54
+
55
+ return dataingestionartifact
56
+
57
+ except Exception as e:
58
+ raise AnimeRecommendorException(e, sys)
anime_recommender/utils/__init__.py ADDED
File without changes
anime_recommender/utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (224 Bytes). View file
 
anime_recommender/utils/main_utils/__init__.py ADDED
File without changes
anime_recommender/utils/main_utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (235 Bytes). View file
 
anime_recommender/utils/main_utils/__pycache__/utils.cpython-310.pyc ADDED
Binary file (2.06 kB). View file
 
anime_recommender/utils/main_utils/utils.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ import pandas as pd
5
+ import joblib
6
+ from anime_recommender.loggers.logging import logging
7
+ from anime_recommender.exception.exception import AnimeRecommendorException
8
+ from anime_recommender.constant import *
9
+
10
+ def export_data_to_dataframe(dataframe: pd.DataFrame, file_path: str) -> pd.DataFrame:
11
+ try:
12
+ logging.info(f"Saving DataFrame to file: {file_path}")
13
+ dir_path = os.path.dirname(file_path)
14
+ os.makedirs(dir_path, exist_ok=True)
15
+ dataframe.to_csv(file_path, index=False, header=True)
16
+ logging.info(f"DataFrame saved successfully to {file_path}.")
17
+ return dataframe
18
+ except Exception as e:
19
+ raise AnimeRecommendorException(e, sys)
20
+
21
+ def load_csv_data(file_path: str) -> pd.DataFrame:
22
+ try:
23
+ df = pd.read_csv(file_path)
24
+ return df
25
+ except Exception as e:
26
+ raise AnimeRecommendorException(e, sys) from e
27
+
28
+ def save_model(model: object,file_path: str ) -> None:
29
+ try:
30
+ logging.info("Entered the save_model method of Main utils class")
31
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
32
+ with open(file_path, "wb") as file_obj:
33
+ joblib.dump(model, file_obj)
34
+ logging.info("Completed saving the model object.")
35
+ except Exception as e:
36
+ raise AnimeRecommendorException(e, sys) from e
37
+
38
+ def load_object(file_path:str)-> object:
39
+ try:
40
+ if not os.path.exists(file_path):
41
+ raise Exception(f"The file: {file_path} is not exists")
42
+ with open(file_path,"rb") as file_obj:
43
+ print(file_obj)
44
+ return joblib.load(file_obj)
45
+ except Exception as e:
46
+ raise AnimeRecommendorException(e,sys) from e
47
+
run_pipeline.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from anime_recommender.loggers.logging import logging
3
+ from anime_recommender.exception.exception import AnimeRecommendorException
4
+ from anime_recommender.source.data_ingestion import DataIngestion
5
+ from anime_recommender.entity.config_entity import TrainingPipelineConfig,DataIngestionConfig
6
+ # ,DataTransformationConfig,CollaborativeModelConfig,ContentBasedModelConfig
7
+ # from anime_recommender.source.data_transformation import DataTransformation
8
+ # from anime_recommender.source.collaborative_recommenders import CollaborativeModelTrainer
9
+ # from anime_recommender.source.content_based_recommenders import ContentBasedModelTrainer
10
+ # from anime_recommender.source.popularity_based_recommenders import PopularityBasedRecommendor
11
+
12
+ if __name__ == "__main__":
13
+ try:
14
+ training_pipeline_config = TrainingPipelineConfig()
15
+ data_ingestion_config = DataIngestionConfig(training_pipeline_config)
16
+ data_ingestion = DataIngestion(data_ingestion_config)
17
+ logging.info("Initiating Data Ingestion.")
18
+ data_ingestion_artifact = data_ingestion.ingest_data()
19
+ logging.info(f"Data ingestion completed.")
20
+ print(data_ingestion_artifact)
21
+
22
+ # # Data Transformation
23
+ # data_transformation_config = DataTransformationConfig(training_pipeline_config)
24
+ # data_transformation = DataTransformation(data_ingestion_artifact,data_transformation_config)
25
+ # logging.info("Initiating Data Transformation.")
26
+ # data_transformation_artifact = data_transformation.initiate_data_transformation()
27
+ # logging.info("Data Transformation Completed.")
28
+ # print(data_transformation_artifact)
29
+
30
+ # # Collaborative Model Training
31
+ # collaborative_model_trainer_config = CollaborativeModelConfig(training_pipeline_config)
32
+ # collaborative_model_trainer = CollaborativeModelTrainer(collaborative_model_trainer_config= collaborative_model_trainer_config,data_transformation_artifact=data_transformation_artifact)
33
+ # logging.info("Initiating Collaborative Model training.")
34
+ # collaborative_model_trainer_artifact = collaborative_model_trainer.initiate_model_trainer(model_type='svd')
35
+ # logging.info("Collaborative Model training completed.")
36
+ # print(collaborative_model_trainer_artifact)
37
+
38
+ # # Content Based Model Training
39
+ # content_based_model_trainer_config = ContentBasedModelConfig(training_pipeline_config)
40
+ # content_based_model_trainer = ContentBasedModelTrainer(content_based_model_trainer_config=content_based_model_trainer_config,data_ingestion_artifact=data_ingestion_artifact)
41
+ # logging.info("Initiating Content Based Model training.")
42
+ # content_based_model_trainer_artifact = content_based_model_trainer.initiate_model_trainer()
43
+ # logging.info("Content Based Model training completed.")
44
+ # print(content_based_model_trainer_artifact)
45
+
46
+ # # Popularity Based Filtering
47
+ # logging.info("Initiating Popularity based filtering.")
48
+ # filtering = PopularityBasedRecommendor(data_ingestion_artifact=data_ingestion_artifact)
49
+ # popularity_recommendations = filtering.initiate_model_trainer(filter_type='top_avg_rated')
50
+ # logging.info("Popularity based filtering completed.")
51
+
52
+ except Exception as e:
53
+ raise AnimeRecommendorException(e, sys)