import sys import numpy as np import pandas as pd from anime_recommender.loggers.logging import logging from anime_recommender.exception.exception import AnimeRecommendorException from anime_recommender.utils.main_utils.utils import export_data_to_dataframe from anime_recommender.constant import * from anime_recommender.entity.config_entity import DataTransformationConfig from anime_recommender.entity.artifact_entity import DataIngestionArtifact,DataTransformationArtifact class DataTransformation: """ Class for handling data transformation for energy generation models. """ def __init__(self,data_ingestion_artifact:DataIngestionArtifact,data_transformation_config:DataTransformationConfig): """ Initializes the DataTransformation class with the given data ingestion and configuration artifacts. Args: data_ingestion_artifact (DataIngestionArtifact): The artifact containing ingested data paths. data_transformation_config (DataTransformationConfig): Configuration object for data transformation. """ try: self.data_ingestion_artifact = data_ingestion_artifact self.data_transformation_config = data_transformation_config except Exception as e: raise AnimeRecommendorException(e,sys) @staticmethod def read_data(file_path)->pd.DataFrame: """ Reads data from a CSV file. Args: file_path (str): Path to the CSV file. Returns: pd.DataFrame: The DataFrame containing the data from the CSV file. """ try: return pd.read_csv(file_path) except Exception as e: raise AnimeRecommendorException(e,sys) @staticmethod def merge_data(anime_df: pd.DataFrame, rating_df: pd.DataFrame) -> pd.DataFrame: """ Merges the anime and rating DataFrames on 'anime_id'. Args: anime_df (pd.DataFrame): DataFrame containing anime information. rating_df (pd.DataFrame): DataFrame containing user rating information. Returns: pd.DataFrame: Merged DataFrame on 'anime_id'. """ try: merged_df = pd.merge(rating_df, anime_df, on="anime_id", how="inner") logging.info(f"Shape of the Merged dataframe:{merged_df.shape}") logging.info(f"Column names: {merged_df.columns}") return merged_df except Exception as e: raise AnimeRecommendorException(e, sys) @staticmethod def clean_filter_data(merged_df: pd.DataFrame) -> pd.DataFrame: """ Cleans the merged DataFrame by replacing 'UNKNOWN' with NaN, filling NaN values with median and also filters the data. Args: merged_df (pd.DataFrame): Merged DataFrame to clean and filter. Returns: pd.DataFrame: Cleaned and Filtered DataFrame with NaN values handled. """ try: merged_df['average_rating'].replace('UNKNOWN', np.nan) merged_df['average_rating'] = pd.to_numeric(merged_df['average_rating'], errors='coerce') merged_df['average_rating'].fillna(merged_df['average_rating'].median()) merged_df = merged_df[merged_df['average_rating'] > 6] cols_to_drop = [ 'username', 'overview', 'type', 'episodes', 'producers', 'licensors', 'studios', 'source', 'rank', 'popularity', 'favorites', 'scored by', 'members' ] cleaned_df = merged_df.copy() cleaned_df.drop(columns=cols_to_drop, inplace=True) logging.info(f"Shape of the Merged dataframe:{cleaned_df.shape}") logging.info(f"Column names: {cleaned_df.columns}") logging.info(f"Preview of the merged DataFrame:\n{cleaned_df.head()}") return cleaned_df except Exception as e: raise AnimeRecommendorException(e, sys) def initiate_data_transformation(self)->DataTransformationArtifact: """ Initiates the data transformation process by reading, transforming, and saving the data. Returns: DataTransformationArtifact: The artifact containing paths to the transformed data. """ logging.info("Entering initiate_data_transformation method of DataTransformation class.") try: anime_df = DataTransformation.read_data(self.data_ingestion_artifact.feature_store_anime_file_path) rating_df = DataTransformation.read_data(self.data_ingestion_artifact.feature_store_userrating_file_path) merged_df = DataTransformation.merge_data(anime_df, rating_df) transformed_df = DataTransformation.clean_filter_data(merged_df) export_data_to_dataframe(transformed_df, self.data_transformation_config.merged_file_path) data_transformation_artifact = DataTransformationArtifact( merged_file_path=self.data_transformation_config.merged_file_path ) return data_transformation_artifact except Exception as e: raise AnimeRecommendorException(e,sys)