Edwin Salguero
Initial commit: Enhanced Algorithmic Trading System with Synthetic Data Generation, Comprehensive Logging, and Extensive Testing
859af74
import pytest | |
import pandas as pd | |
import numpy as np | |
from datetime import datetime | |
import tempfile | |
import os | |
from agentic_ai_system.synthetic_data_generator import SyntheticDataGenerator | |
class TestSyntheticDataGenerator: | |
"""Test cases for SyntheticDataGenerator""" | |
def config(self): | |
"""Sample configuration for testing""" | |
return { | |
'synthetic_data': { | |
'base_price': 100.0, | |
'volatility': 0.02, | |
'trend': 0.001, | |
'noise_level': 0.005 | |
}, | |
'trading': { | |
'symbol': 'AAPL', | |
'timeframe': '1min' | |
} | |
} | |
def generator(self, config): | |
"""Create a SyntheticDataGenerator instance""" | |
return SyntheticDataGenerator(config) | |
def test_initialization(self, generator, config): | |
"""Test generator initialization""" | |
assert generator.base_price == config['synthetic_data']['base_price'] | |
assert generator.volatility == config['synthetic_data']['volatility'] | |
assert generator.trend == config['synthetic_data']['trend'] | |
assert generator.noise_level == config['synthetic_data']['noise_level'] | |
def test_generate_ohlcv_data(self, generator): | |
"""Test OHLCV data generation""" | |
df = generator.generate_ohlcv_data( | |
symbol='AAPL', | |
start_date='2024-01-01', | |
end_date='2024-01-02', | |
frequency='1min' | |
) | |
# Check DataFrame structure | |
assert isinstance(df, pd.DataFrame) | |
assert len(df) > 0 | |
# Check required columns | |
required_columns = ['timestamp', 'symbol', 'open', 'high', 'low', 'close', 'volume'] | |
for col in required_columns: | |
assert col in df.columns | |
# Check data types | |
assert df['timestamp'].dtype == 'datetime64[ns]' | |
assert df['symbol'].dtype == 'object' | |
assert df['open'].dtype in ['float64', 'float32'] | |
assert df['high'].dtype in ['float64', 'float32'] | |
assert df['low'].dtype in ['float64', 'float32'] | |
assert df['close'].dtype in ['float64', 'float32'] | |
assert df['volume'].dtype in ['int64', 'int32'] | |
# Check data validity | |
assert (df['high'] >= df['low']).all() | |
assert (df['high'] >= df['open']).all() | |
assert (df['high'] >= df['close']).all() | |
assert (df['low'] <= df['open']).all() | |
assert (df['low'] <= df['close']).all() | |
assert (df['volume'] >= 0).all() | |
assert (df['open'] > 0).all() | |
assert (df['close'] > 0).all() | |
def test_generate_tick_data(self, generator): | |
"""Test tick data generation""" | |
df = generator.generate_tick_data( | |
symbol='AAPL', | |
duration_minutes=10, | |
tick_interval_ms=1000 | |
) | |
# Check DataFrame structure | |
assert isinstance(df, pd.DataFrame) | |
assert len(df) > 0 | |
# Check required columns | |
required_columns = ['timestamp', 'symbol', 'price', 'volume'] | |
for col in required_columns: | |
assert col in df.columns | |
# Check data validity | |
assert (df['price'] > 0).all() | |
assert (df['volume'] >= 0).all() | |
assert df['symbol'].iloc[0] == 'AAPL' | |
def test_generate_price_series(self, generator): | |
"""Test price series generation""" | |
length = 100 | |
prices = generator._generate_price_series(length) | |
assert isinstance(prices, np.ndarray) | |
assert len(prices) == length | |
assert (prices > 0).all() # All prices should be positive | |
def test_save_to_csv(self, generator): | |
"""Test saving data to CSV""" | |
df = generator.generate_ohlcv_data( | |
symbol='AAPL', | |
start_date='2024-01-01', | |
end_date='2024-01-01', | |
frequency='1H' | |
) | |
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as tmp_file: | |
filepath = tmp_file.name | |
try: | |
generator.save_to_csv(df, filepath) | |
# Check if file exists and has content | |
assert os.path.exists(filepath) | |
assert os.path.getsize(filepath) > 0 | |
# Load and verify data | |
loaded_df = pd.read_csv(filepath) | |
assert len(loaded_df) == len(df) | |
assert list(loaded_df.columns) == list(df.columns) | |
finally: | |
# Cleanup | |
if os.path.exists(filepath): | |
os.unlink(filepath) | |
def test_market_scenarios(self, generator): | |
"""Test different market scenarios""" | |
scenarios = ['normal', 'volatile', 'trending', 'crash'] | |
for scenario in scenarios: | |
df = generator.generate_market_scenarios(scenario) | |
assert isinstance(df, pd.DataFrame) | |
assert len(df) > 0 | |
# Check that crash scenario has lower prices on average | |
if scenario == 'crash': | |
avg_price = df['close'].mean() | |
assert avg_price < generator.base_price * 0.9 # Should be significantly lower | |
def test_invalid_frequency(self, generator): | |
"""Test handling of invalid frequency""" | |
with pytest.raises(ValueError, match="Unsupported frequency"): | |
generator.generate_ohlcv_data(frequency='invalid') | |
def test_invalid_scenario(self, generator): | |
"""Test handling of invalid scenario""" | |
with pytest.raises(ValueError, match="Unknown scenario type"): | |
generator.generate_market_scenarios('invalid_scenario') | |
def test_empty_date_range(self, generator): | |
"""Test handling of empty date range""" | |
df = generator.generate_ohlcv_data( | |
start_date='2024-01-01', | |
end_date='2024-01-01', | |
frequency='1D' | |
) | |
# Should generate at least one data point | |
assert len(df) >= 1 | |
def test_different_symbols(self, generator): | |
"""Test data generation for different symbols""" | |
symbols = ['AAPL', 'GOOGL', 'MSFT', 'TSLA'] | |
for symbol in symbols: | |
df = generator.generate_ohlcv_data(symbol=symbol) | |
assert df['symbol'].iloc[0] == symbol | |
def test_price_consistency(self, generator): | |
"""Test that generated prices are consistent""" | |
df = generator.generate_ohlcv_data( | |
start_date='2024-01-01', | |
end_date='2024-01-02', | |
frequency='1H' | |
) | |
# Check that prices are within reasonable bounds | |
max_price = df[['open', 'high', 'low', 'close']].max().max() | |
min_price = df[['open', 'high', 'low', 'close']].min().min() | |
# Prices should be within 50% of base price | |
assert min_price > generator.base_price * 0.5 | |
assert max_price < generator.base_price * 1.5 | |
def test_volume_correlation(self, generator): | |
"""Test that volume correlates with price movement""" | |
df = generator.generate_ohlcv_data( | |
start_date='2024-01-01', | |
end_date='2024-01-02', | |
frequency='1H' | |
) | |
# Calculate price movement | |
df['price_movement'] = abs(df['close'] - df['open']) | |
# Check that volume is correlated with price movement | |
correlation = df['volume'].corr(df['price_movement']) | |
assert not np.isnan(correlation) # Should have some correlation |