#!/usr/bin/env python3
"""
Deep Analysis — Complete profit/loss/margin study for optimization research.
Answers: Why we make money, why lots blow up, where to optimize.
"""
import csv, sys, os, json
from collections import defaultdict
from datetime import datetime, timedelta

os.chdir(os.path.dirname(os.path.abspath(__file__)))

def load_trades(filepath):
    trades = []
    with open(filepath) as f:
        for row in csv.DictReader(f):
            trades.append({
                'id': int(row['trade_id']),
                'side': row['side'],
                'lots': float(row['lots']),
                'entry_price': float(row['entry_price']),
                'exit_price': float(row['exit_price']),
                'entry_time': row['entry_time'],
                'exit_time': row['exit_time'],
                'profit': float(row['profit']),
                'thread': row.get('magic_thread', ''),
                'comment': row.get('comment', ''),
            })
    return trades

def get_level(comment):
    if 'REC_L' in comment:
        try:
            return int(comment.split('REC_L')[1].split('_')[0])
        except:
            return 0
    return 0

def get_type(comment):
    if 'SUB_RPT' in comment: return 'SUB_REPEAT'
    if 'REC_L' in comment and '_S' in comment: return 'RECOVERY_SUB'
    if 'REC_L' in comment: return 'RECOVERY_MAIN'
    return 'ENTRY'

def analyze(filepath, starting_balance=20000):
    trades = load_trades(filepath)
    report = []
    r = lambda s: report.append(s)

    r("=" * 80)
    r(f"  DEEP ANALYSIS — {os.path.basename(filepath)}")
    r(f"  {len(trades):,} trades | ${starting_balance:,.0f} starting")
    r("=" * 80)

    # ================================================================
    # SECTION 1: PROFIT SOURCE BREAKDOWN
    # ================================================================
    r("\n" + "=" * 80)
    r("  SECTION 1: WHERE DOES THE PROFIT COME FROM?")
    r("=" * 80)

    type_stats = defaultdict(lambda: {'count': 0, 'profit': 0, 'winners': 0, 'losers': 0,
                                       'gross_win': 0, 'gross_loss': 0, 'lots': 0})
    for t in trades:
        tt = get_type(t['comment'])
        ts = type_stats[tt]
        ts['count'] += 1
        ts['profit'] += t['profit']
        ts['lots'] += t['lots']
        if t['profit'] > 0:
            ts['winners'] += 1
            ts['gross_win'] += t['profit']
        else:
            ts['losers'] += 1
            ts['gross_loss'] += abs(t['profit'])

    total_profit = sum(ts['profit'] for ts in type_stats.values())
    r(f"\n{'Type':<18} {'Count':>7} {'Net Profit':>12} {'% of Total':>10} {'Win%':>6} {'Avg P&L':>10}")
    r("-" * 70)
    for tt in ['ENTRY', 'RECOVERY_MAIN', 'RECOVERY_SUB', 'SUB_REPEAT']:
        ts = type_stats[tt]
        pct = ts['profit'] / total_profit * 100 if total_profit else 0
        wr = ts['winners'] / ts['count'] * 100 if ts['count'] else 0
        avg = ts['profit'] / ts['count'] if ts['count'] else 0
        r(f"{tt:<18} {ts['count']:>7,} ${ts['profit']:>11,.0f} {pct:>9.1f}% {wr:>5.1f}% ${avg:>9.2f}")
    r(f"{'TOTAL':<18} {len(trades):>7,} ${total_profit:>11,.0f}")

    # ================================================================
    # SECTION 2: PROFIT BY RECOVERY LEVEL
    # ================================================================
    r("\n" + "=" * 80)
    r("  SECTION 2: PROFIT BY RECOVERY DEPTH")
    r("=" * 80)

    level_stats = defaultdict(lambda: {'count': 0, 'profit': 0, 'max_lot': 0, 'max_notional': 0,
                                        'threads': set()})
    for t in trades:
        lvl = get_level(t['comment'])
        ls = level_stats[lvl]
        ls['count'] += 1
        ls['profit'] += t['profit']
        ls['max_lot'] = max(ls['max_lot'], t['lots'])
        ls['max_notional'] = max(ls['max_notional'], t['lots'] * t['entry_price'])
        ls['threads'].add(t['thread'])

    r(f"\n{'Level':>6} {'Trades':>7} {'Threads':>8} {'Net Profit':>12} {'% of Total':>10} {'MaxLot':>8} {'MaxNotional':>13}")
    r("-" * 75)
    
    cumulative_pct = 0
    for lvl in sorted(level_stats.keys()):
        ls = level_stats[lvl]
        pct = ls['profit'] / total_profit * 100 if total_profit else 0
        cumulative_pct += pct
        lbl = 'Entry' if lvl == 0 else f'L{lvl}'
        r(f"{lbl:>6} {ls['count']:>7,} {len(ls['threads']):>8} ${ls['profit']:>11,.0f} {pct:>9.1f}% {ls['max_lot']:>8.3f} ${ls['max_notional']:>12,.0f}")

    # Group by depth ranges
    r(f"\nProfit by depth range:")
    ranges = [(0,0,'Entry only'), (1,3,'L1-L3 (shallow)'), (4,10,'L4-L10 (medium)'),
              (11,20,'L11-L20 (deep)'), (21,39,'L21-L39 (extreme)')]
    for lo, hi, label in ranges:
        rng_profit = sum(level_stats[l]['profit'] for l in level_stats if lo <= l <= hi)
        rng_trades = sum(level_stats[l]['count'] for l in level_stats if lo <= l <= hi)
        rng_threads = len(set().union(*[level_stats[l]['threads'] for l in level_stats if lo <= l <= hi]) if any(lo <= l <= hi for l in level_stats) else set())
        pct = rng_profit / total_profit * 100 if total_profit else 0
        rng_max_lot = max((level_stats[l]['max_lot'] for l in level_stats if lo <= l <= hi), default=0)
        rng_max_notional = max((level_stats[l]['max_notional'] for l in level_stats if lo <= l <= hi), default=0)
        r(f"  {label:<25} {rng_trades:>7,} trades  ${rng_profit:>11,.0f} ({pct:>5.1f}%)  max lot {rng_max_lot:.3f}  max ${rng_max_notional:,.0f}")

    # ================================================================
    # SECTION 3: THREAD LIFECYCLE
    # ================================================================
    r("\n" + "=" * 80)
    r("  SECTION 3: THREAD LIFECYCLE")
    r("=" * 80)

    thread_data = defaultdict(lambda: {'trades': 0, 'profit': 0, 'max_level': 0,
                                        'max_lot': 0, 'first': '', 'last': '',
                                        'entry_profit': 0, 'recovery_profit': 0})
    for t in trades:
        th = t['thread']
        td = thread_data[th]
        td['trades'] += 1
        td['profit'] += t['profit']
        td['max_level'] = max(td['max_level'], get_level(t['comment']))
        td['max_lot'] = max(td['max_lot'], t['lots'])
        if not td['first']: td['first'] = t['entry_time']
        td['last'] = t['exit_time']
        if get_type(t['comment']) == 'ENTRY':
            td['entry_profit'] += t['profit']
        else:
            td['recovery_profit'] += t['profit']

    # Thread stats by max depth
    depth_groups = defaultdict(lambda: {'count': 0, 'total_profit': 0, 'avg_trades': 0,
                                         'max_lot': 0})
    for th, td in thread_data.items():
        d = td['max_level']
        if d == 0: grp = 'Entry only'
        elif d <= 3: grp = 'Shallow (L1-3)'
        elif d <= 10: grp = 'Medium (L4-10)'
        elif d <= 20: grp = 'Deep (L11-20)'
        else: grp = 'Extreme (L21+)'
        dg = depth_groups[grp]
        dg['count'] += 1
        dg['total_profit'] += td['profit']
        dg['avg_trades'] += td['trades']
        dg['max_lot'] = max(dg['max_lot'], td['max_lot'])

    r(f"\n{'Depth Group':<20} {'Threads':>8} {'Total Profit':>13} {'Avg Profit':>11} {'Avg Trades':>11} {'MaxLot':>8}")
    r("-" * 78)
    for grp in ['Entry only', 'Shallow (L1-3)', 'Medium (L4-10)', 'Deep (L11-20)', 'Extreme (L21+)']:
        dg = depth_groups[grp]
        if dg['count'] == 0: continue
        avg_p = dg['total_profit'] / dg['count']
        avg_t = dg['avg_trades'] / dg['count']
        pct = dg['total_profit'] / total_profit * 100 if total_profit else 0
        r(f"{grp:<20} {dg['count']:>8,} ${dg['total_profit']:>12,.0f} ${avg_p:>10,.0f} {avg_t:>10.1f} {dg['max_lot']:>8.3f}")

    # Thread duration
    durations = []
    for th, td in thread_data.items():
        try:
            t1 = datetime.strptime(td['first'], '%Y-%m-%d %H:%M:%S')
            t2 = datetime.strptime(td['last'], '%Y-%m-%d %H:%M:%S')
            dur_hrs = (t2 - t1).total_seconds() / 3600
            durations.append((dur_hrs, td['max_level'], td['profit']))
        except:
            pass
    
    if durations:
        r(f"\nThread duration stats:")
        r(f"  Avg: {sum(d[0] for d in durations)/len(durations):.1f} hours")
        r(f"  Median: {sorted(d[0] for d in durations)[len(durations)//2]:.1f} hours")
        r(f"  Max: {max(d[0] for d in durations):.1f} hours")
        r(f"  Threads < 1 hour: {sum(1 for d in durations if d[0] < 1)}/{len(durations)} ({sum(1 for d in durations if d[0] < 1)/len(durations)*100:.0f}%)")
        r(f"  Threads < 24 hours: {sum(1 for d in durations if d[0] < 24)}/{len(durations)} ({sum(1 for d in durations if d[0] < 24)/len(durations)*100:.0f}%)")

    # ================================================================
    # SECTION 4: WHAT IF WE CAP RECOVERY DEPTH?
    # ================================================================
    r("\n" + "=" * 80)
    r("  SECTION 4: IMPACT OF RECOVERY DEPTH CAPS")
    r("=" * 80)

    r(f"\nWhat if we limited max recovery depth?")
    r(f"{'Cap':>6} {'Profit Lost':>13} {'% Lost':>8} {'Remaining':>13} {'Max Lot Saved':>14}")
    r("-" * 60)

    for cap in [3, 5, 7, 10, 15, 20, 25, 30]:
        lost = sum(level_stats[l]['profit'] for l in level_stats if l > cap)
        remaining = total_profit - lost
        pct_lost = lost / total_profit * 100 if total_profit else 0
        max_lot_at_cap = max((level_stats[l]['max_lot'] for l in level_stats if l <= cap), default=0)
        max_lot_above = max((level_stats[l]['max_lot'] for l in level_stats if l > cap), default=0)
        r(f"L{cap:>4} ${lost:>12,.0f} {pct_lost:>7.1f}% ${remaining:>12,.0f}  {max_lot_at_cap:.3f} (was {max_lot_above:.3f})")

    # ================================================================
    # SECTION 5: PEAK EXPOSURE ANALYSIS
    # ================================================================
    r("\n" + "=" * 80)
    r("  SECTION 5: PEAK EXPOSURE TIMELINE")
    r("=" * 80)

    events = []
    for t in trades:
        n = t['lots'] * t['entry_price']
        events.append(('o', t['entry_time'], n, t['lots'], t['thread'], get_level(t['comment'])))
        events.append(('c', t['exit_time'], -n, -t['lots'], t['thread'], get_level(t['comment'])))
    events.sort(key=lambda x: x[1])

    # Track exposure over time, record top 10 peaks
    peaks = []
    running_n = 0
    running_l = 0
    open_threads = defaultdict(lambda: {'notional': 0, 'lots': 0, 'count': 0, 'max_level': 0})

    for etype, time, notional, lots, thread, level in events:
        running_n += notional
        running_l += lots
        if etype == 'o':
            open_threads[thread]['notional'] += notional
            open_threads[thread]['lots'] += lots
            open_threads[thread]['count'] += 1
            open_threads[thread]['max_level'] = max(open_threads[thread]['max_level'], level)
        
        if running_n > (peaks[-1]['notional'] if peaks else 0):
            peaks.append({
                'time': time, 'notional': running_n, 'lots': running_l,
                'num_threads': len([t for t in open_threads if open_threads[t]['count'] > 0]),
                'thread_detail': dict(open_threads)
            })
            peaks = sorted(peaks, key=lambda x: x['notional'], reverse=True)[:10]

    r(f"\nTop 5 peak exposure moments:")
    r(f"{'Time':<22} {'Notional':>14} {'Lots':>8} {'Threads':>8}")
    r("-" * 55)
    for p in peaks[:5]:
        r(f"{p['time']:<22} ${p['notional']:>13,.0f} {p['lots']:>8.1f} {p['num_threads']:>8}")

    # What's open at the #1 peak?
    if peaks:
        p1 = peaks[0]
        r(f"\nAt peak ({p1['time']}):")
        r(f"  Total notional: ${p1['notional']:,.0f}")
        r(f"  Total lots: {p1['lots']:.1f}")
        
        # Notional breakdown by depth
        peak_by_depth = defaultdict(float)
        for th, td in p1['thread_detail'].items():
            if td['count'] > 0:
                peak_by_depth[td['max_level']] += td['notional']
        
        r(f"  Notional by thread depth at peak:")
        for d in sorted(peak_by_depth.keys()):
            lbl = 'Entry' if d == 0 else f'L{d}'
            r(f"    {lbl}: ${peak_by_depth[d]:,.0f} ({peak_by_depth[d]/p1['notional']*100:.1f}%)")

    # ================================================================
    # SECTION 6: COMMISSION ANALYSIS
    # ================================================================
    r("\n" + "=" * 80)
    r("  SECTION 6: COMMISSION IMPACT")
    r("=" * 80)

    total_notional_traded = sum(t['lots'] * t['entry_price'] * 2 for t in trades)  # entry + exit
    for fee_pct in [0.04, 0.025, 0.01, 0.00]:
        commission = total_notional_traded * (fee_pct / 100)
        gross = sum(t['profit'] for t in trades if t['profit'] > 0)
        gross_loss = abs(sum(t['profit'] for t in trades if t['profit'] < 0))
        # Rough estimate: current profit already includes whatever commission was in the sim
        # We want to show what profit would be at each fee level
        r(f"  Fee {fee_pct:.3f}%: commission ~${commission:,.0f} | {commission/gross*100:.0f}% of gross profit")

    # ================================================================
    # SECTION 7: KEY FINDINGS & RECOMMENDATIONS
    # ================================================================
    r("\n" + "=" * 80)
    r("  SECTION 7: KEY FINDINGS")
    r("=" * 80)

    # Calculate key metrics
    entry_only_threads = sum(1 for th, td in thread_data.items() if td['max_level'] == 0)
    deep_threads = sum(1 for th, td in thread_data.items() if td['max_level'] > 10)
    extreme_threads = sum(1 for th, td in thread_data.items() if td['max_level'] > 20)
    deep_profit = sum(td['profit'] for th, td in thread_data.items() if td['max_level'] > 10)
    extreme_profit = sum(td['profit'] for th, td in thread_data.items() if td['max_level'] > 20)

    r(f"\n1. THREAD DISTRIBUTION:")
    r(f"   Entry-only threads: {entry_only_threads}/{len(thread_data)} ({entry_only_threads/len(thread_data)*100:.0f}%)")
    r(f"   Deep threads (L10+): {deep_threads}/{len(thread_data)} ({deep_threads/len(thread_data)*100:.1f}%)")
    r(f"   Extreme threads (L20+): {extreme_threads}/{len(thread_data)} ({extreme_threads/len(thread_data)*100:.1f}%)")
    
    r(f"\n2. PROFIT CONCENTRATION:")
    r(f"   Deep thread profit (L10+): ${deep_profit:,.0f} ({deep_profit/total_profit*100:.1f}% of total)")
    r(f"   Extreme thread profit (L20+): ${extreme_profit:,.0f} ({extreme_profit/total_profit*100:.1f}% of total)")
    
    if peaks:
        r(f"\n3. PEAK EXPOSURE:")
        r(f"   Peak notional: ${peaks[0]['notional']:,.0f}")
        r(f"   At 50x, needs: ${peaks[0]['notional']/50:,.0f} account")
        cap10_max = max((level_stats[l]['max_notional'] for l in level_stats if l <= 10), default=0)
        r(f"   If capped at L10, max notional drops to: ~${cap10_max:,.0f}")
        r(f"   That needs: ${cap10_max/50:,.0f} at 50x")

    return "\n".join(report)


if __name__ == "__main__":
    filepath = sys.argv[1] if len(sys.argv) > 1 else sorted(__import__('glob').glob("results/trades_*.csv"), reverse=True)[0]
    balance = float(sys.argv[2]) if len(sys.argv) > 2 else 20000
    result = analyze(filepath, balance)
    
    # Save report
    report_path = f"results/deep_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
    with open(report_path, 'w') as f:
        f.write(result)
    print(result)
    print(f"\nSaved: {report_path}")