Spaces:
Sleeping
Sleeping
| """data_handler.py module.""" | |
| import numpy as np | |
| import pandas as pd | |
| from typing import Tuple, Dict | |
| import tensorflow as tf | |
| from sklearn.preprocessing import StandardScaler | |
| class FinancialDataHandler: | |
| def __init__(self, config: Dict): | |
| """Initialize the data handler with configuration.""" | |
| self.batch_size = config['data']['batch_size'] | |
| self.shuffle_buffer = config['data']['shuffle_buffer'] | |
| self.prefetch_buffer = config['data']['prefetch_buffer'] | |
| self.scaler = StandardScaler() | |
| def simulate_financial_data(self, num_samples: int = 1000) -> pd.DataFrame: | |
| """Generate synthetic financial data for testing.""" | |
| np.random.seed(42) | |
| data = { | |
| 'transaction_amount': np.random.lognormal(mean=4.0, sigma=1.0, size=num_samples), | |
| 'account_balance': np.random.normal(loc=10000, scale=5000, size=num_samples), | |
| 'transaction_frequency': np.random.poisson(lam=5, size=num_samples), | |
| 'credit_score': np.random.normal(loc=700, scale=50, size=num_samples).clip(300, 850), | |
| 'days_since_last_transaction': np.random.exponential(scale=7, size=num_samples) | |
| } | |
| return pd.DataFrame(data) | |
| def preprocess_data(self, data: pd.DataFrame) -> tf.data.Dataset: | |
| """Preprocess the data and convert to TensorFlow dataset.""" | |
| # Standardize the features | |
| scaled_data = self.scaler.fit_transform(data) | |
| # Convert to TensorFlow dataset | |
| dataset = tf.data.Dataset.from_tensor_slices(scaled_data) | |
| # Apply dataset transformations | |
| dataset = dataset.shuffle(self.shuffle_buffer) | |
| dataset = dataset.batch(self.batch_size) | |
| dataset = dataset.prefetch(self.prefetch_buffer) | |
| return dataset | |
| def get_client_data(self) -> Tuple[tf.data.Dataset, StandardScaler]: | |
| """Get preprocessed client data and scaler.""" | |
| # Simulate client data | |
| raw_data = self.simulate_financial_data() | |
| # Preprocess data | |
| dataset = self.preprocess_data(raw_data) | |
| return dataset, self.scaler | |