#!/usr/bin/env python3
"""
Tick Data Converter for Backtesting
Converts high-frequency tick data to OHLC bars for backtesting
"""

import os
import sys
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
import json

class TickDataConverter:
    """Converts tick data to OHLC bars for backtesting"""

    def __init__(self):
        self.supported_timeframes = {
            '1min': '1min',
            '5min': '5min',
            '15min': '15min',
            '30min': '30min',
            '1hour': '1h',
            '4hour': '4h',
            '1day': '1D',
            '1w': '1W',
            '1m': '1M',
        }

    def convert_tick_to_ohlc(self, csv_file: str, timeframe: str = '1hour',
                           symbol: str = None, market: str = 'forex',
                           include_intrabar_ticks: bool = False) -> pd.DataFrame:
        """Convert tick data CSV to OHLC bars"""

        print(f"🔄 Converting {csv_file} to {timeframe} OHLC bars...")

        # Read CSV with proper column handling
        df = pd.read_csv(csv_file, header=0, names=['timestamp', 'bid_price', 'ask_price', 'bid_volume', 'ask_volume'])

        # Convert timestamp to datetime
        df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y%m%d %H:%M:%S:%f')

        # Calculate mid price (average of bid/ask)
        df['price'] = (df['bid_price'] + df['ask_price']) / 2

        # Calculate volume (combine bid and ask volumes)
        df['volume'] = df['bid_volume'] + df['ask_volume']

        # Set timestamp as index
        df.set_index('timestamp', inplace=True)

        # Resample to OHLC bars
        timeframe_str = self.supported_timeframes.get(timeframe, '1H')

        ohlc = df['price'].resample(timeframe_str).agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last'
        })

        # Volume aggregation
        volume_agg = df['volume'].resample(timeframe_str).sum()

        # Combine OHLC and volume
        result = pd.concat([ohlc, volume_agg], axis=1)

        # Remove any NaN rows
        result = result.dropna()

        # Add symbol column
        if symbol:
            result['symbol'] = symbol
        else:
            # Extract from filename
            filename = os.path.basename(csv_file).upper()
            if 'BTCUSD' in filename:
                result['symbol'] = 'BTC'
            elif 'XAUUSD' in filename:
                result['symbol'] = 'XAUUSD'
            else:
                result['symbol'] = filename.replace('.CSV', '').replace('.csv', '')

        # Optionally include intrabar tick data for advanced strategies
        if include_intrabar_ticks:
            result['intrabar_ticks'] = None
            result['intrabar_ticks'] = result['intrabar_ticks'].astype(object)

            # For each OHLC bar, collect the tick data within that bar
            for idx, row in result.iterrows():
                bar_start = row.name  # timestamp is the index
                bar_end = bar_start + pd.Timedelta(timeframe_str)

                # Get all ticks within this bar period
                bar_ticks = df[
                    (df.index >= bar_start) &
                    (df.index < bar_end)
                ][['price', 'volume']].to_dict('records')

                # Store the tick data as a list of price/volume dictionaries
                result.at[idx, 'intrabar_ticks'] = bar_ticks

        # Reorder columns for consistency
        if market == 'crypto':
            if include_intrabar_ticks:
                result = result[['symbol', 'open', 'high', 'low', 'close', 'volume', 'intrabar_ticks']]
            else:
                result = result[['symbol', 'open', 'high', 'low', 'close', 'volume']]
        elif market == 'forex':
            result['pair'] = result['symbol']
            if include_intrabar_ticks:
                result = result[['pair', 'open', 'high', 'low', 'close', 'volume', 'intrabar_ticks']]
            else:
                result = result[['pair', 'open', 'high', 'low', 'close', 'volume']]

        print(f"✅ Converted {len(df)} tick records to {len(result)} {timeframe} bars")
        if include_intrabar_ticks:
            print("   📊 Including intrabar tick data for advanced strategies")
        print(f"   Date range: {result.index.min()} to {result.index.max()}")

        return result

    def save_ohlc_csv(self, df: pd.DataFrame, output_file: str, market: str = 'forex'):
        """Save OHLC data to CSV in expected format"""

        # Reset index and rename to date for consistency with strategies
        df_reset = df.reset_index()
        df_reset = df_reset.rename(columns={'timestamp': 'date'})

        # Format date properly
        if 'date' in df_reset.columns:
            df_reset['date'] = pd.to_datetime(df_reset['date']).dt.strftime('%Y-%m-%d %H:%M:%S')

        if market == 'crypto':
            # Add news_sentiment column (neutral by default)
            df_reset['news_sentiment'] = 'neutral'
            columns = ['date', 'symbol', 'open', 'close', 'high', 'low', 'volume', 'news_sentiment']
        elif market == 'forex':
            # Add spread and news_impact columns
            df_reset['spread'] = 1.0  # Default spread
            df_reset['news_impact'] = 'low'
            columns = ['date', 'pair', 'open', 'close', 'high', 'low', 'volume', 'spread', 'news_impact']
        else:
            columns = df_reset.columns.tolist()

        # Save to CSV
        df_reset[columns].to_csv(output_file, index=False)
        print(f"💾 Saved {len(df_reset)} bars to {output_file}")

        return output_file

    def process_file(self, input_file: str, timeframe: str = '1hour',
                    output_file: str = None, market: str = 'auto'):
        """Process a single tick data file"""

        if not os.path.exists(input_file):
            raise FileNotFoundError(f"Input file not found: {input_file}")

        # Auto-detect market from filename
        filename = os.path.basename(input_file).upper()
        if market == 'auto':
            if 'BTC' in filename or any(crypto in filename for crypto in ['ETH', 'ADA', 'SOL', 'DOT']):
                market = 'crypto'
            else:
                market = 'forex'

        # Extract symbol
        symbol = None
        if 'BTCUSD' in filename:
            symbol = 'BTC' if market == 'crypto' else 'BTCUSD'
        elif 'XAUUSD' in filename:
            symbol = 'XAUUSD'

        # Convert to OHLC
        ohlc_df = self.convert_tick_to_ohlc(input_file, timeframe, symbol, market)

        # Generate output filename if not provided
        if output_file is None:
            base_name = os.path.splitext(os.path.basename(input_file))[0]
            output_file = f"data/csv/{base_name}_{timeframe}_ohlc.csv"

        # Save to CSV
        self.save_ohlc_csv(ohlc_df, output_file, market)

        return output_file, len(ohlc_df)

def batch_convert_data_csv():
    """Batch convert all CSV files in data/csv/ directory"""
    import glob
    import os

    csv_dir = "data/csv"
    if not os.path.exists(csv_dir):
        print(f"❌ Directory {csv_dir} not found")
        return

    # Find all CSV files that are not already OHLC files and look like tick data
    all_csv_files = glob.glob(f"{csv_dir}/*.csv")
    tick_files = []

    for f in all_csv_files:
        if '_ohlc.csv' in f or f.endswith('_ohlc.csv'):
            continue  # Skip already converted files

        # Check if this looks like tick data by examining the header
        try:
            with open(f, 'r') as file:
                first_line = file.readline().strip().lower()
                # Look for tick data headers (Timestamp, Bid price, etc.)
                if any(term in first_line for term in ['timestamp', 'bid price', 'ask price', 'bid volume']):
                    tick_files.append(f)
        except Exception:
            continue  # Skip files we can't read

    if not tick_files:
        print("✅ No new tick data files to convert")
        return

    print(f"🔄 Found {len(tick_files)} tick data files to convert:")
    for f in tick_files:
        print(f"  • {os.path.basename(f)}")

    converter = TickDataConverter()

    # Convert each file to multiple timeframes
    timeframes_to_create = ['1hour', '4hour', '1day']  # Common timeframes

    converted_files = []
    for input_file in tick_files:
        filename = os.path.basename(input_file).upper()

        # Auto-detect market
        if any(term in filename for term in ['BTC', 'ETH', 'ADA', 'SOL', 'DOT']):
            market = 'crypto'
        else:
            market = 'forex'

        print(f"\n🔄 Processing {os.path.basename(input_file)} ({market})")

        for timeframe in timeframes_to_create:
            try:
                output_file, num_bars = converter.process_file(
                    input_file,
                    timeframe,
                    None,  # Auto-generate output filename
                    market
                )
                converted_files.append((output_file, num_bars, timeframe))
                print(f"  ✅ {timeframe}: {num_bars} bars")
            except Exception as e:
                print(f"  ❌ {timeframe}: {e}")

    print("\n🎯 Batch Conversion Complete!")
    print(f"📊 Total conversions: {len(converted_files)}")
    print("\n📁 Generated files:")
    for output_file, num_bars, timeframe in converted_files:
        print(f"  • {os.path.basename(output_file)}: {num_bars} bars ({timeframe})")

def main():
    """Main function for tick data conversion"""
    import argparse

    parser = argparse.ArgumentParser(description='Convert tick data to OHLC bars')
    parser.add_argument('input_file', nargs='?', help='Input tick data CSV file (omit to batch convert all)')
    parser.add_argument('--timeframe', '-t', default='1hour',
                       choices=['1min', '5min', '15min', '30min', '1hour', '4hour', '1day', '1w', '1m'],
                       help='Timeframe for OHLC bars (default: 1hour)')
    parser.add_argument('--output', '-o', help='Output CSV file path')
    parser.add_argument('--market', '-m', choices=['forex', 'crypto', 'auto'],
                       default='auto', help='Market type (auto-detect from filename)')
    parser.add_argument('--batch', '-b', action='store_true',
                       help='Batch convert all CSV files in data/csv/ directory')

    args = parser.parse_args()

    try:
        if args.batch or not args.input_file:
            # Batch convert all files
            batch_convert_data_csv()
        else:
            # Convert single file
            converter = TickDataConverter()
            output_file, num_bars = converter.process_file(
                args.input_file,
                args.timeframe,
                args.output,
                args.market
            )

            print("\n🎯 Conversion Complete!")
            print(f"📁 Output: {output_file}")
            print(f"📊 Bars: {num_bars}")
            print(f"⏰ Timeframe: {args.timeframe}")

    except Exception as e:
        print(f"❌ Error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()
