#!/usr/bin/env python3
"""
Scan BTCUSD M5 data to measure actual price excursions from trade entries.

For each continuation-pattern entry signal, tracks:
  - MAE: Max Adverse Excursion (how far price goes AGAINST the trade)
  - MFE: Max Favorable Excursion (how far price goes WITH the trade)
  - Time to TP hit (if it hits)
  - Number of grid levels that would be crossed at various step sizes

This tells us what grid distances and TPs are realistic for BTC M5.
"""

import sys, os
os.chdir(os.path.dirname(os.path.abspath(__file__)))

import pandas as pd
import numpy as np
from collections import defaultdict

TICK_FILE = "BTCUSD.csv"
TIMEFRAME = "5min"
FIBO_BARS_BACK = 13
LOOKAHEAD_BARS = 500  # how many bars forward to track each entry


def load_candles(max_rows=None):
    read_params = {"filepath_or_buffer": TICK_FILE}
    if max_rows:
        read_params["nrows"] = max_rows
    df = pd.read_csv(**read_params)
    df["datetime"] = pd.to_datetime(
        df["Timestamp"].str.replace(r":(\d{3})$", r".\1", regex=True),
        format="%Y%m%d %H:%M:%S.%f"
    )
    df["mid"] = (df["Bid price"] + df["Ask price"]) / 2.0
    df = df.set_index("datetime")
    ohlcv = df["mid"].resample(TIMEFRAME).ohlc()
    ohlcv.columns = ["open", "high", "low", "close"]
    ohlcv = ohlcv.dropna()
    return ohlcv


def bar_direction(candle):
    if candle["close"] > candle["open"]:
        return 0  # bull
    elif candle["close"] < candle["open"]:
        return 1  # bear
    return 2  # doji


def find_entries(candles):
    """Find continuation entries: two consecutive same-direction bars."""
    entries = []
    for i in range(2, len(candles)):
        bar1 = bar_direction(candles.iloc[i - 1])
        bar2 = bar_direction(candles.iloc[i - 2])

        if bar1 == 0 and bar2 == 0:
            entries.append((i, 0, candles.iloc[i]["open"]))  # buy at next open
        elif bar1 == 1 and bar2 == 1:
            entries.append((i, 1, candles.iloc[i]["open"]))  # sell at next open

    return entries


def analyze_excursions(candles, entries):
    """For each entry, track MAE/MFE over lookahead window."""
    results = []
    n = len(candles)

    for idx, direction, entry_price in entries:
        mae = 0.0  # max adverse excursion (worst drawdown)
        mfe = 0.0  # max favorable excursion (best unrealized profit)
        bars_to_mae = 0
        bars_to_mfe = 0

        end = min(idx + LOOKAHEAD_BARS, n)

        for j in range(idx, end):
            h = candles.iloc[j]["high"]
            l = candles.iloc[j]["low"]

            if direction == 0:  # buy
                adverse = entry_price - l
                favorable = h - entry_price
            else:  # sell
                adverse = h - entry_price
                favorable = entry_price - l

            if adverse > mae:
                mae = adverse
                bars_to_mae = j - idx
            if favorable > mfe:
                mfe = favorable
                bars_to_mfe = j - idx

        results.append({
            "idx": idx,
            "direction": "BUY" if direction == 0 else "SELL",
            "entry_price": entry_price,
            "mae": mae,
            "mfe": mfe,
            "mae_pct": mae / entry_price * 100,
            "mfe_pct": mfe / entry_price * 100,
            "bars_to_mae": bars_to_mae,
            "bars_to_mfe": bars_to_mfe,
        })

    return results


def count_grid_crossings(mae_values, step_sizes_usd):
    """For each step size, count how many grid levels the MAE would cross."""
    print("\n=== GRID LEVEL CROSSINGS (how many levels MAE would trigger) ===")
    print(f"{'Step $':>10} | {'Mean lvls':>10} | {'Med lvls':>10} | {'P75 lvls':>10} | {'P90 lvls':>10} | {'P95 lvls':>10} | {'Max lvls':>10}")
    print("-" * 85)

    for step in step_sizes_usd:
        levels = []
        for mae in mae_values:
            # cumulative grid: each level is `step` further
            # level N at distance N*step from entry
            n_levels = int(mae / step)
            levels.append(n_levels)
        levels = np.array(levels)
        print(f"${step:>8,.0f} | {levels.mean():>10.1f} | {np.median(levels):>10.0f} | "
              f"{np.percentile(levels, 75):>10.0f} | {np.percentile(levels, 90):>10.0f} | "
              f"{np.percentile(levels, 95):>10.0f} | {levels.max():>10.0f}")


def find_tp_hit_rate(candles, entries, tp_distances_usd):
    """For each TP distance, what % of entries hit TP within lookahead?"""
    print(f"\n=== TP HIT RATE (within {LOOKAHEAD_BARS} bars = {LOOKAHEAD_BARS*5/60:.0f}h) ===")
    print(f"{'TP $':>10} | {'Hit Rate':>10} | {'Avg bars':>10} | {'Med bars':>10} | {'TP as %':>10}")
    print("-" * 60)

    n = len(candles)
    avg_price = candles["close"].mean()

    for tp_usd in tp_distances_usd:
        hits = 0
        bars_to_hit = []

        for idx, direction, entry_price in entries:
            end = min(idx + LOOKAHEAD_BARS, n)
            hit = False

            for j in range(idx, end):
                h = candles.iloc[j]["high"]
                l = candles.iloc[j]["low"]

                if direction == 0 and h >= entry_price + tp_usd:
                    hits += 1
                    bars_to_hit.append(j - idx)
                    hit = True
                    break
                elif direction == 1 and l <= entry_price - tp_usd:
                    hits += 1
                    bars_to_hit.append(j - idx)
                    hit = True
                    break

            # not hit within window

        hit_rate = hits / len(entries) * 100 if entries else 0
        avg_b = np.mean(bars_to_hit) if bars_to_hit else 0
        med_b = np.median(bars_to_hit) if bars_to_hit else 0

        print(f"${tp_usd:>8,.0f} | {hit_rate:>9.1f}% | {avg_b:>10.0f} | {med_b:>10.0f} | {tp_usd/avg_price*100:>9.2f}%")


def main():
    max_rows = None
    for arg in sys.argv[1:]:
        if arg.startswith("--max-rows="):
            max_rows = int(arg.split("=")[1])

    print("Loading candles...")
    candles = load_candles(max_rows)
    print(f"Loaded {len(candles):,} M5 candles")
    print(f"Price range: ${candles['low'].min():,.0f} - ${candles['high'].max():,.0f}")
    avg_price = candles["close"].mean()
    print(f"Average price: ${avg_price:,.0f}")

    # Measure per-bar volatility
    candle_ranges = candles["high"] - candles["low"]
    print(f"\nPer-bar (M5) range: mean=${candle_ranges.mean():,.1f}, "
          f"median=${candle_ranges.median():,.1f}, "
          f"P95=${candle_ranges.quantile(0.95):,.1f}")

    # ATR-like: average true range over rolling windows
    for window in [13, 20, 50]:
        atr = candle_ranges.rolling(window).mean()
        print(f"ATR({window}): mean=${atr.mean():,.1f}, current=${atr.iloc[-1]:,.1f}")

    print(f"\n{'='*70}")
    print("Finding continuation entries...")
    entries = find_entries(candles)
    print(f"Found {len(entries):,} entries ({len([e for e in entries if e[1]==0]):,} buys, "
          f"{len([e for e in entries if e[1]==1]):,} sells)")

    # Analyze excursions
    print("\nAnalyzing price excursions from each entry...")
    results = analyze_excursions(candles, entries)

    maes = np.array([r["mae"] for r in results])
    mfes = np.array([r["mfe"] for r in results])
    mae_pcts = np.array([r["mae_pct"] for r in results])
    mfe_pcts = np.array([r["mfe_pct"] for r in results])

    print(f"\n=== MAX ADVERSE EXCURSION (drawdown from entry) ===")
    print(f"  Mean:   ${maes.mean():>10,.1f}  ({mae_pcts.mean():.2f}%)")
    print(f"  Median: ${np.median(maes):>10,.1f}  ({np.median(mae_pcts):.2f}%)")
    print(f"  P25:    ${np.percentile(maes, 25):>10,.1f}")
    print(f"  P50:    ${np.percentile(maes, 50):>10,.1f}")
    print(f"  P75:    ${np.percentile(maes, 75):>10,.1f}")
    print(f"  P90:    ${np.percentile(maes, 90):>10,.1f}")
    print(f"  P95:    ${np.percentile(maes, 95):>10,.1f}")
    print(f"  P99:    ${np.percentile(maes, 99):>10,.1f}")
    print(f"  Max:    ${maes.max():>10,.1f}")

    print(f"\n=== MAX FAVORABLE EXCURSION (profit potential) ===")
    print(f"  Mean:   ${mfes.mean():>10,.1f}  ({mfe_pcts.mean():.2f}%)")
    print(f"  Median: ${np.median(mfes):>10,.1f}  ({np.median(mfe_pcts):.2f}%)")
    print(f"  P25:    ${np.percentile(mfes, 25):>10,.1f}")
    print(f"  P50:    ${np.percentile(mfes, 50):>10,.1f}")
    print(f"  P75:    ${np.percentile(mfes, 75):>10,.1f}")
    print(f"  P90:    ${np.percentile(mfes, 90):>10,.1f}")
    print(f"  P95:    ${np.percentile(mfes, 95):>10,.1f}")
    print(f"  P99:    ${np.percentile(mfes, 99):>10,.1f}")
    print(f"  Max:    ${mfes.max():>10,.1f}")

    # MAE distribution histogram (text)
    print(f"\n=== MAE DISTRIBUTION ($ buckets) ===")
    buckets = [0, 500, 1000, 1500, 2000, 2500, 3000, 4000, 5000, 7500, 10000, 15000, 20000, 50000]
    for i in range(len(buckets) - 1):
        count = np.sum((maes >= buckets[i]) & (maes < buckets[i+1]))
        pct = count / len(maes) * 100
        bar = "#" * int(pct)
        print(f"  ${buckets[i]:>6,}-${buckets[i+1]:>6,}: {count:>5} ({pct:>5.1f}%) {bar}")
    count = np.sum(maes >= buckets[-1])
    pct = count / len(maes) * 100
    print(f"  ${buckets[-1]:>6,}+:       {count:>5} ({pct:>5.1f}%)")

    # MFE distribution
    print(f"\n=== MFE DISTRIBUTION ($ buckets) ===")
    for i in range(len(buckets) - 1):
        count = np.sum((mfes >= buckets[i]) & (mfes < buckets[i+1]))
        pct = count / len(mfes) * 100
        bar = "#" * int(pct)
        print(f"  ${buckets[i]:>6,}-${buckets[i+1]:>6,}: {count:>5} ({pct:>5.1f}%) {bar}")
    count = np.sum(mfes >= buckets[-1])
    pct = count / len(mfes) * 100
    print(f"  ${buckets[-1]:>6,}+:       {count:>5} ({pct:>5.1f}%)")

    # Grid level crossing analysis
    step_sizes = [1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 6000, 7500, 10000]
    count_grid_crossings(maes, step_sizes)

    # TP hit rate analysis
    tp_distances = [500, 750, 1000, 1250, 1500, 2000, 2500, 3000, 4000, 5000]
    find_tp_hit_rate(candles, entries, tp_distances)

    # MFE/MAE ratio — edge quality
    ratios = mfes / np.maximum(maes, 1.0)
    print(f"\n=== MFE/MAE RATIO (edge quality, >1 = favorable) ===")
    print(f"  Mean: {ratios.mean():.2f}")
    print(f"  Median: {np.median(ratios):.2f}")
    print(f"  % entries with MFE > MAE: {np.sum(mfes > maes) / len(entries) * 100:.1f}%")

    # Bars to MAE/MFE
    bars_mae = np.array([r["bars_to_mae"] for r in results])
    bars_mfe = np.array([r["bars_to_mfe"] for r in results])
    print(f"\n=== TIMING ===")
    print(f"  Bars to max drawdown:  mean={bars_mae.mean():.0f}, median={np.median(bars_mae):.0f}")
    print(f"  Bars to max profit:    mean={bars_mfe.mean():.0f}, median={np.median(bars_mfe):.0f}")
    print(f"  (1 bar = 5 min, 12 bars = 1 hour, 288 bars = 1 day)")

    # Suggest optimal grid
    print(f"\n{'='*70}")
    print("=== SUGGESTED GRID PARAMETERS ===")
    print(f"\nBased on MAE percentiles (wider grid = fewer recovery triggers):")
    p50_mae = np.percentile(maes, 50)
    p75_mae = np.percentile(maes, 75)
    p90_mae = np.percentile(maes, 90)

    print(f"  If L1 = P50 MAE (${p50_mae:,.0f}): 50% of trades never trigger recovery")
    print(f"  If L1 = P75 MAE (${p75_mae:,.0f}): 75% of trades never trigger recovery")
    print(f"  If L1 = P90 MAE (${p90_mae:,.0f}): 90% of trades never trigger recovery")

    print(f"\nBased on MFE percentiles (TP should be hit by most trades):")
    p25_mfe = np.percentile(mfes, 25)
    p50_mfe = np.percentile(mfes, 50)
    p75_mfe = np.percentile(mfes, 75)
    print(f"  TP = P25 MFE (${p25_mfe:,.0f}): 75% of trades reach this profit")
    print(f"  TP = P50 MFE (${p50_mfe:,.0f}): 50% of trades reach this profit")
    print(f"  TP = P75 MFE (${p75_mfe:,.0f}): 25% of trades reach this profit")


if __name__ == "__main__":
    main()