krishnaveni76's picture
Updated all files
b4f6ffc
import sys
import numpy as np
import pandas as pd
from anime_recommender.loggers.logging import logging
from anime_recommender.exception.exception import AnimeRecommendorException
from anime_recommender.utils.main_utils.utils import export_data_to_dataframe
from anime_recommender.constant import *
from anime_recommender.entity.config_entity import DataTransformationConfig
from anime_recommender.entity.artifact_entity import DataIngestionArtifact,DataTransformationArtifact
class DataTransformation:
"""
Class for handling data transformation for energy generation models.
"""
def __init__(self,data_ingestion_artifact:DataIngestionArtifact,data_transformation_config:DataTransformationConfig):
"""
Initializes the DataTransformation class with the given data ingestion and configuration artifacts.
Args:
data_ingestion_artifact (DataIngestionArtifact): The artifact containing ingested data paths.
data_transformation_config (DataTransformationConfig): Configuration object for data transformation.
"""
try:
self.data_ingestion_artifact = data_ingestion_artifact
self.data_transformation_config = data_transformation_config
except Exception as e:
raise AnimeRecommendorException(e,sys)
@staticmethod
def read_data(file_path)->pd.DataFrame:
"""
Reads data from a CSV file.
Args:
file_path (str): Path to the CSV file.
Returns:
pd.DataFrame: The DataFrame containing the data from the CSV file.
"""
try:
return pd.read_csv(file_path)
except Exception as e:
raise AnimeRecommendorException(e,sys)
@staticmethod
def merge_data(anime_df: pd.DataFrame, rating_df: pd.DataFrame) -> pd.DataFrame:
"""
Merges the anime and rating DataFrames on 'anime_id'.
Args:
anime_df (pd.DataFrame): DataFrame containing anime information.
rating_df (pd.DataFrame): DataFrame containing user rating information.
Returns:
pd.DataFrame: Merged DataFrame on 'anime_id'.
"""
try:
merged_df = pd.merge(rating_df, anime_df, on="anime_id", how="inner")
logging.info(f"Shape of the Merged dataframe:{merged_df.shape}")
logging.info(f"Column names: {merged_df.columns}")
return merged_df
except Exception as e:
raise AnimeRecommendorException(e, sys)
@staticmethod
def clean_filter_data(merged_df: pd.DataFrame) -> pd.DataFrame:
"""
Cleans the merged DataFrame by replacing 'UNKNOWN' with NaN, filling NaN values with median and also filters the data.
Args:
merged_df (pd.DataFrame): Merged DataFrame to clean and filter.
Returns:
pd.DataFrame: Cleaned and Filtered DataFrame with NaN values handled.
"""
try:
merged_df['average_rating'].replace('UNKNOWN', np.nan)
merged_df['average_rating'] = pd.to_numeric(merged_df['average_rating'], errors='coerce')
merged_df['average_rating'].fillna(merged_df['average_rating'].median())
merged_df = merged_df[merged_df['average_rating'] > 6]
cols_to_drop = [ 'username', 'overview', 'type', 'episodes', 'producers',
'licensors', 'studios', 'source', 'rank', 'popularity',
'favorites', 'scored by', 'members' ]
cleaned_df = merged_df.copy()
cleaned_df.drop(columns=cols_to_drop, inplace=True)
logging.info(f"Shape of the Merged dataframe:{cleaned_df.shape}")
logging.info(f"Column names: {cleaned_df.columns}")
logging.info(f"Preview of the merged DataFrame:\n{cleaned_df.head()}")
return cleaned_df
except Exception as e:
raise AnimeRecommendorException(e, sys)
def initiate_data_transformation(self)->DataTransformationArtifact:
"""
Initiates the data transformation process by reading, transforming, and saving the data.
Returns:
DataTransformationArtifact: The artifact containing paths to the transformed data.
"""
logging.info("Entering initiate_data_transformation method of DataTransformation class.")
try:
anime_df = DataTransformation.read_data(self.data_ingestion_artifact.feature_store_anime_file_path)
rating_df = DataTransformation.read_data(self.data_ingestion_artifact.feature_store_userrating_file_path)
merged_df = DataTransformation.merge_data(anime_df, rating_df)
transformed_df = DataTransformation.clean_filter_data(merged_df)
export_data_to_dataframe(transformed_df, self.data_transformation_config.merged_file_path)
data_transformation_artifact = DataTransformationArtifact(
merged_file_path=self.data_transformation_config.merged_file_path
)
return data_transformation_artifact
except Exception as e:
raise AnimeRecommendorException(e,sys)