"""
Walk-Forward Backtest for XAUUSD_v6.py
======================================
Bar-by-bar M5 backtest that derives H1 from M5 candles (including the forming
H1 bar), computes the same indicators, simulates SL/TP, and retrains the
Logistic Regression model every 8 closed trades just like the live bot.

Usage:
------
python backtest_XAUUSD.py

Input : XAUUSD_M5.csv  (Time,Open,High,Low,Close,Volume)
Output: backtest_report.txt + console log
"""

import os
import warnings
from datetime import datetime
from typing import Dict, Any, Tuple, Optional

import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression

warnings.filterwarnings("ignore")

# =============================================================================
# CONFIGURATION  (keep in sync with XAUUSD_v6.py)
# =============================================================================
CSV_PATH      = os.path.join(os.path.dirname(__file__), "XAUUSD_M5.csv")
REPORT_PATH   = os.path.join(os.path.dirname(__file__), "backtest_report.txt")
TRADES_CSV    = os.path.join(os.path.dirname(__file__), "trades_log.csv")

SYMBOL         = "XAUUSD"
EMA_PERIOD     = 22
RSI_PERIOD     = 14
LOT_SIZE       = 0.1
RR_RATIO       = 2.0
SL_ATR_MULTI   = 2.0
THRESHOLD      = 0.60
WARMUP_TRADES  = 30
RETRAIN_EVERY  = 8
BETTING_START  = 6
BETTING_END    = 19

# Minimum history before we trust indicators
MIN_H1_BARS    = 50
MIN_M5_BARS    = 50

# =============================================================================
# INDICATORS  (identical logic to XAUUSD_v6.py)
# =============================================================================
def indicator_pack(df: pd.DataFrame) -> pd.DataFrame:
    """Add EMA, ATR, ADX, RSI to a dataframe (close, high, low, volume)."""
    if df.empty:
        return df

    # EMA
    df["ema"] = df["close"].ewm(span=EMA_PERIOD, adjust=False).mean()

    # ATR(14)
    hl = df["high"] - df["low"]
    hc = (df["high"] - df["close"].shift()).abs()
    lc = (df["low"] - df["close"].shift()).abs()
    tr = pd.concat([hl, hc, lc], axis=1).max(axis=1)
    df["atr"] = tr.rolling(14).mean()

    # ADX(14)
    up, dn = df["high"].diff(), -df["low"].diff()
    plus_dm  = np.where((up > dn) & (up > 0), up, 0.)
    minus_dm = np.where((dn > up) & (dn > 0), dn, 0.)
    tr14     = tr.rolling(14).sum()
    plus_di  = 100 * pd.Series(plus_dm, index=df.index).rolling(14).sum() / tr14
    minus_di = 100 * pd.Series(minus_dm, index=df.index).rolling(14).sum() / tr14
    dx       = (abs(plus_di - minus_di) / (plus_di + minus_di)).replace(
                   [np.inf, -np.inf], 0).fillna(0) * 100
    df["adx"] = dx.rolling(14).mean()

    # RSI(14)
    delta = df["close"].diff()
    gain  = (delta.where(delta > 0, 0)).rolling(window=RSI_PERIOD).mean()
    loss  = (-delta.where(delta < 0, 0)).rolling(window=RSI_PERIOD).mean()
    rs    = gain / loss
    df["rsi"] = 100 - (100 / (1 + rs))

    return df


# =============================================================================
# H1 BUILDER  (incremental, no look-ahead bias)
# =============================================================================
def build_h1_up_to(m5_df: pd.DataFrame, idx: int) -> pd.DataFrame:
    """
    Reconstruct the H1 chart as it would have appeared at M5 bar *idx*.
    Completed H1 bars are frozen; the current hour bar is built from all
    M5 bars seen so far in that hour (exactly what MT5 shows live).
    """
    current_time = m5_df.loc[idx, "time"]
    current_hour = current_time.floor("h")

    sub = m5_df.iloc[:idx + 1].copy()

    # --- completed H1 bars (hours strictly before current_hour) ---
    completed = sub[sub["time"] < current_hour]
    h1_completed = pd.DataFrame()
    if len(completed) > 0:
        h1_completed = (
            completed.resample("h", on="time")
            .agg({"open": "first", "high": "max", "low": "min",
                  "close": "last", "volume": "sum"})
            .dropna()
            .reset_index()
        )

    # --- forming H1 bar (current hour, updated as new M5 bars arrive) ---
    forming = sub[sub["time"] >= current_hour]
    h1_forming = pd.DataFrame()
    if len(forming) > 0:
        h1_forming = pd.DataFrame([{
            "time": current_hour,
            "open": forming["open"].iloc[0],
            "high": forming["high"].max(),
            "low": forming["low"].min(),
            "close": forming["close"].iloc[-1],
            "volume": forming["volume"].sum(),
        }])

    return pd.concat([h1_completed, h1_forming], ignore_index=True)


# =============================================================================
# SIGNAL LOGIC  (identical to XAUUSD_v6.py)
# =============================================================================
def get_trade_signal(entry_df: pd.DataFrame, trend_df: pd.DataFrame) -> Tuple[
    Optional[str], Dict[str, bool], Optional[pd.Series],
    Optional[pd.Series], Optional[float]
]:
    """Returns (signal, filters_dict, last_m5, trend_h1, atr_median)."""
    if (entry_df.empty or trend_df.empty or len(entry_df) < 2 or
            entry_df[["ema", "atr", "adx", "rsi", "close"]].iloc[-1].isna().any() or
            trend_df[["ema", "close"]].iloc[-1].isna().any()):
        return None, {}, None, None, None

    last, prev = entry_df.iloc[-1], entry_df.iloc[-2]
    trend_last = trend_df.iloc[-1]

    # ATR median over last 300 M5 bars (or all available)
    lookback   = min(300, len(entry_df))
    atr_median = entry_df["atr"].iloc[-lookback:].median()

    rsi_min, rsi_max = 30, 70

    buy_filters = {
        "crossed_up": prev.close < prev.ema and last.close > last.ema,
        "trend_up": trend_last.close > trend_last.ema,
        "atr_ok": last.atr > atr_median,
        "adx_ok": last.adx > 20,
        "rsi_ok": rsi_min < last.rsi < rsi_max,
    }
    sell_filters = {
        "crossed_down": prev.close > prev.ema and last.close < last.ema,
        "trend_down": trend_last.close < trend_last.ema,
        "atr_ok": last.atr > atr_median,
        "adx_ok": last.adx > 20,
        "rsi_ok": rsi_min < last.rsi < rsi_max,
    }

    if all(buy_filters.values()):
        return "BUY", buy_filters, last, trend_last, atr_median
    if all(sell_filters.values()):
        return "SELL", sell_filters, last, trend_last, atr_median

    filters = buy_filters if sum(buy_filters.values()) > sum(sell_filters.values()) else sell_filters
    return None, filters, last, trend_last, atr_median


# =============================================================================
# FEATURE ENGINEERING  (identical to XAUUSD_v6.py)
# =============================================================================
def build_features(candle: pd.Series, trend_candle: pd.Series, atr_median: float) -> Dict[str, Any]:
    return {
        "timestamp": int(candle.time.timestamp()),
        "hour": candle.time.hour,
        "candle_size": candle.high - candle.low,
        "ema_distance": abs(candle.close - candle.ema),
        "atr": candle.atr,
        "adx": candle.adx,
        "rsi": candle.rsi,
        "volume": candle.volume,
        "trend_above_ema": int(trend_candle.close > trend_candle.ema),
        "range_status": int(candle.adx < 20),
        "volatility_level": int(candle.atr > atr_median),
        "outcome": -1,   # -1 = pending, 0 = loss, 1 = win
        "entered": 0,
        "had_signal": 0,
    }


FEATURE_COLS = [
    "hour", "candle_size", "ema_distance", "atr", "adx", "rsi",
    "volume", "trend_above_ema", "range_status", "volatility_level",
]


def train_model(df: pd.DataFrame) -> Optional[LogisticRegression]:
    """Train logistic regression on all *completed* trades (outcome 0 or 1)."""
    trades = df[df["outcome"].isin([0, 1])]
    if len(trades) < WARMUP_TRADES:
        print(f"  [ML] Not enough completed trades ({len(trades)}) to train. Need {WARMUP_TRADES}.")
        return None
    X, y = trades[FEATURE_COLS], trades["outcome"]
    model = LogisticRegression(max_iter=500, class_weight="balanced").fit(X, y)
    return model


def predict_prob(model: LogisticRegression, row: pd.DataFrame) -> float:
    return model.predict_proba(row[FEATURE_COLS])[0, 1]


# =============================================================================
# BACKTEST ENGINE
# =============================================================================
class Backtester:
    def __init__(self):
        self.equity = 10000.0
        self.equity_curve: list[Dict[str, Any]] = []
        self.trades: list[Dict[str, Any]] = []
        self.features_df = pd.DataFrame()
        self.model: Optional[LogisticRegression] = None
        self.open_trade: Optional[Dict[str, Any]] = None
        self.total_closed = 0
        self.signals_seen = 0
        self.trades_filtered = 0
        self.filter_stats = {
            "bars_checked": 0,
            "buy_crossed_up": 0, "buy_trend_up": 0, "buy_atr_ok": 0,
            "buy_adx_ok": 0, "buy_rsi_ok": 0,
            "sell_crossed_down": 0, "sell_trend_down": 0, "sell_atr_ok": 0,
            "sell_adx_ok": 0, "sell_rsi_ok": 0,
            "buy_full": 0, "sell_full": 0,
        }
        self.first_signal_logged = False

    # -------------------------------------------------------------------------
    def run(self):
        if not os.path.isfile(CSV_PATH):
            print(f"[ERROR] CSV not found: {CSV_PATH}")
            return

        df = pd.read_csv(CSV_PATH)
        # Normalise column names to match live-bot convention
        df.columns = [c.strip().lower() for c in df.columns]
        df["time"] = pd.to_datetime(df["time"])
        df = df.sort_values("time").reset_index(drop=True)

        print(f"[INFO] Loaded {len(df)} M5 bars  |  {df['time'].iloc[0]}  ->  {df['time'].iloc[-1]}\n")

        total_bars = len(df)
        for i in range(total_bars):
            if i > 0 and i % 10000 == 0:
                pct = i / total_bars * 100
                print(f"  ... processed {i:,} / {total_bars:,} bars ({pct:.0f}%)  |  "
                      f"Closed trades: {self.total_closed}  Equity: {self.equity:.2f}")
            bar = df.iloc[i]
            t   = bar["time"]
            hour = t.hour

            # === Betting hours (same as live bot) ===
            if not (BETTING_START <= hour < BETTING_END):
                continue

            # === Build H1 snapshot as of this M5 bar ===
            h1_df = build_h1_up_to(df, i)

            # === Wait for indicator warmup ===
            if len(h1_df) < MIN_H1_BARS or i < MIN_M5_BARS:
                continue

            # === Indicator calculation ===
            m5_window = df.iloc[max(0, i - 299):i + 1].copy()
            m5_window = indicator_pack(m5_window)
            # Match v6: only use last 300 H1 bars for trend indicators
            h1_df = h1_df.iloc[-300:].copy() if len(h1_df) > 300 else h1_df.copy()
            h1_df = indicator_pack(h1_df)

            # === Check existing trade for SL/TP on this bar close ===
            self._check_close(bar)

            # === Generate signal ===
            signal, _filters, last, trend_last, atr_median = get_trade_signal(m5_window, h1_df)

            if last is None:
                continue

            # === Log features for every bar ===
            feat = build_features(last, trend_last, atr_median)
            feat["had_signal"] = int(signal is not None)
            self.features_df = pd.concat(
                [self.features_df, pd.DataFrame([feat])], ignore_index=True
            )

            # --- track filter stats for diagnostics ---
            self.filter_stats["bars_checked"] += 1
            if last is not None and len(m5_window) >= 2:
                prev = m5_window.iloc[-2]
                # Recompute filters for tracking (same logic as get_trade_signal)
                buy_f = {
                    "crossed_up": prev.close < prev.ema and last.close > last.ema,
                    "trend_up": trend_last.close > trend_last.ema,
                    "atr_ok": last.atr > atr_median,
                    "adx_ok": last.adx > 20,
                    "rsi_ok": 30 < last.rsi < 70,
                }
                sell_f = {
                    "crossed_down": prev.close > prev.ema and last.close < last.ema,
                    "trend_down": trend_last.close < trend_last.ema,
                    "atr_ok": last.atr > atr_median,
                    "adx_ok": last.adx > 20,
                    "rsi_ok": 30 < last.rsi < 70,
                }
                for k in buy_f:
                    if buy_f[k]:
                        self.filter_stats[f"buy_{k}"] += 1
                for k in sell_f:
                    if sell_f[k]:
                        self.filter_stats[f"sell_{k}"] += 1
                if all(buy_f.values()):
                    self.filter_stats["buy_full"] += 1
                if all(sell_f.values()):
                    self.filter_stats["sell_full"] += 1

                # Log first time we get close (4/5 filters) or a full signal
                buy_score = sum(buy_f.values())
                sell_score = sum(sell_f.values())
                if not self.first_signal_logged and (buy_score >= 4 or sell_score >= 4 or signal is not None):
                    print(f"\n[DIAG] First near-signal at bar {i} ({bar['time']}):")
                    print(f"  M5 close={last.close:.3f}  EMA={last.ema:.3f}  ATR={last.atr:.3f}  ADX={last.adx:.1f}  RSI={last.rsi:.1f}")
                    print(f"  H1 close={trend_last.close:.3f}  H1_EMA={trend_last.ema:.3f}")
                    print(f"  BUY filters: {buy_score}/5  {buy_f}")
                    print(f"  SELL filters: {sell_score}/5  {sell_f}")
                    if signal:
                        print(f"  --> SIGNAL: {signal}")
                    self.first_signal_logged = True

            if signal is not None:
                self.signals_seen += 1

                # === Model filtering (walk-forward) ===
                prob = 0.5
                accept = True
                if self.model is not None:
                    prob = predict_prob(self.model, self.features_df.iloc[[-1]])
                    accept = prob >= THRESHOLD
                    if not accept:
                        self.trades_filtered += 1

                # === Execute if accepted and no open trade ===
                if accept and self.open_trade is None:
                    self._open_trade(signal, bar, last, feat, _filters)

        # -------------------------------------------------------------------
        # End-of-data: force-close any hanging trade at last known price
        # -------------------------------------------------------------------
        if self.open_trade is not None:
            print("\n[WARN] Open trade at end of data — forcing close at last bar.")
            self._close_trade(self.open_trade["entry_price"], "END_OF_DATA", df.iloc[-1]["time"])

        print(f"\n[INFO] Backtest complete. Processing final report...")
        self._generate_report()

    # -------------------------------------------------------------------------
    def _check_close(self, bar: pd.Series):
        """Check if the currently open trade hits SL or TP on this bar's close."""
        if self.open_trade is None:
            return

        direction = self.open_trade["direction"]
        sl = self.open_trade["sl"]
        tp = self.open_trade["tp"]
        price = bar["close"]

        if direction == "BUY":
            if price <= sl:
                self._close_trade(sl, "SL", bar["time"])
            elif price >= tp:
                self._close_trade(tp, "TP", bar["time"])
        else:  # SELL
            if price >= sl:
                self._close_trade(sl, "SL", bar["time"])
            elif price <= tp:
                self._close_trade(tp, "TP", bar["time"])

    # -------------------------------------------------------------------------
    def _open_trade(self, signal: str, bar: pd.Series, candle: pd.Series, feat: Dict, filters: Dict):
        entry = bar["close"]
        atr   = candle.atr
        sl_pt = atr * SL_ATR_MULTI
        tp_pt = sl_pt * RR_RATIO

        if signal == "BUY":
            sl = entry - sl_pt
            tp = entry + tp_pt
        else:
            sl = entry + sl_pt
            tp = entry - tp_pt

        self.open_trade = {
            "direction": signal,
            "entry_price": entry,
            "sl": sl,
            "tp": tp,
            "entry_time": bar["time"],
            "timestamp": feat["timestamp"],
            "filters": filters,
        }
        # Mark entered in feature row
        self.features_df.at[self.features_df.index[-1], "entered"] = 1

        filter_str = " | ".join(f"{k}={'✓' if v else '✗'}" for k, v in filters.items())
        print(f"{bar['time']}  {signal}  entry={entry:.3f}  SL={sl:.3f}  TP={tp:.3f}  |  {filter_str}")

    # -------------------------------------------------------------------------
    def _close_trade(self, exit_price: float, reason: str, exit_time):
        trade = self.open_trade
        direction = trade["direction"]
        entry = trade["entry_price"]

        # XAUUSD: 1 lot = 100 oz  =>  0.1 lot = 10 oz  =>  $1 move = $10 for 0.1 lot
        price_diff = exit_price - entry if direction == "BUY" else entry - exit_price
        profit = price_diff * 100 * LOT_SIZE

        outcome = 1 if profit > 0 else 0

        # Update feature row with outcome
        mask = self.features_df["timestamp"] == trade["timestamp"]
        self.features_df.loc[mask, "outcome"] = outcome

        self.total_closed += 1
        self.equity += profit
        self.equity_curve.append({"time": exit_time, "equity": self.equity})
        # Serialize filter details for CSV/analysis
        filter_summary = {f"f_{k}": int(v) for k, v in trade.get("filters", {}).items()}
        self.trades.append({
            "direction": direction, "entry": entry, "exit": exit_price,
            "profit": profit, "reason": reason, "outcome": outcome,
            "entry_time": trade["entry_time"], "exit_time": exit_time,
            **filter_summary,
        })

        print(f"  -> CLOSE {reason}  P/L={profit:+.2f}  Equity={self.equity:.2f}")

        # === Retrain model every RETRAIN_EVERY closed trades ===
        if self.total_closed > 0 and self.total_closed % RETRAIN_EVERY == 0:
            print(f"  -> Retraining model after {self.total_closed} closed trades...")
            self.model = train_model(self.features_df)
            if self.model:
                print(f"  -> Model retrained successfully.")

        self.open_trade = None

    # -------------------------------------------------------------------------
    def _generate_report(self):
        print("\n" + "=" * 60)
        print("BACKTEST REPORT  –  XAUUSD M5 Walk-Forward")
        print("=" * 60)

        # --- filter diagnostics (always printed) ---
        s = self.filter_stats
        checked = max(1, s["bars_checked"])
        print(f"\n--- FILTER DIAGNOSTICS ({checked:,} bars checked after warmup) ---")
        print(f"  BUY crossed_up  : {s['buy_crossed_up']:>6,}  ({s['buy_crossed_up']/checked*100:5.1f}%)")
        print(f"  BUY trend_up    : {s['buy_trend_up']:>6,}  ({s['buy_trend_up']/checked*100:5.1f}%)")
        print(f"  BUY atr_ok      : {s['buy_atr_ok']:>6,}  ({s['buy_atr_ok']/checked*100:5.1f}%)")
        print(f"  BUY adx_ok      : {s['buy_adx_ok']:>6,}  ({s['buy_adx_ok']/checked*100:5.1f}%)")
        print(f"  BUY rsi_ok      : {s['buy_rsi_ok']:>6,}  ({s['buy_rsi_ok']/checked*100:5.1f}%)")
        print(f"  BUY full signal : {s['buy_full']:>6,}  ({s['buy_full']/checked*100:5.1f}%)")
        print(f"  SELL crossed_dow: {s['sell_crossed_down']:>6,}  ({s['sell_crossed_down']/checked*100:5.1f}%)")
        print(f"  SELL trend_down : {s['sell_trend_down']:>6,}  ({s['sell_trend_down']/checked*100:5.1f}%)")
        print(f"  SELL atr_ok     : {s['sell_atr_ok']:>6,}  ({s['sell_atr_ok']/checked*100:5.1f}%)")
        print(f"  SELL adx_ok     : {s['sell_adx_ok']:>6,}  ({s['sell_adx_ok']/checked*100:5.1f}%)")
        print(f"  SELL rsi_ok     : {s['sell_rsi_ok']:>6,}  ({s['sell_rsi_ok']/checked*100:5.1f}%)")
        print(f"  SELL full signal: {s['sell_full']:>6,}  ({s['sell_full']/checked*100:5.1f}%)")
        print()

        if not self.trades:
            print("No trades executed during backtest period.")
            return

        tdf = pd.DataFrame(self.trades)
        wins  = tdf[tdf["outcome"] == 1]
        loss  = tdf[tdf["outcome"] == 0]

        total       = len(tdf)
        win_rate    = len(wins) / total * 100
        gross_profit = wins["profit"].sum() if len(wins) else 0
        gross_loss   = abs(loss["profit"].sum()) if len(loss) else 0
        profit_factor = gross_profit / gross_loss if gross_loss else float("inf")
        avg_win     = wins["profit"].mean() if len(wins) else 0
        avg_loss    = loss["profit"].mean() if len(loss) else 0
        total_return = (self.equity - 10000) / 10000 * 100

        # Max drawdown from equity curve
        eq = pd.DataFrame(self.equity_curve)
        eq["peak"] = eq["equity"].cummax()
        eq["dd_pct"] = (eq["equity"] - eq["peak"]) / eq["peak"] * 100
        max_dd = eq["dd_pct"].min()

        report = f"""
Bars processed : {len(self.features_df):,}
Signals seen   : {self.signals_seen}
Trades taken   : {total}
Trades filtered: {self.trades_filtered}

Wins           : {len(wins)}
Losses         : {len(loss)}
Win Rate       : {win_rate:.2f} %

Gross Profit   : {gross_profit:+.2f}
Gross Loss     : {-gross_loss:+.2f}
Profit Factor  : {profit_factor:.2f}
Avg Win        : {avg_win:+.2f}
Avg Loss       : {avg_loss:+.2f}

Start Equity   : 10,000.00
Final Equity   : {self.equity:.2f}
Total Return   : {total_return:+.2f} %
Max Drawdown   : {max_dd:.2f} %
"""
        print(report)
        print(f"Detailed report  : {REPORT_PATH}")
        print(f"Trades CSV       : {TRADES_CSV}")
        print("=" * 60)

        with open(REPORT_PATH, "w") as f:
            f.write("BACKTEST REPORT\n")
            f.write("=" * 60 + "\n")
            f.write(f"\n--- FILTER DIAGNOSTICS ({checked:,} bars checked after warmup) ---\n")
            f.write(f"  BUY crossed_up   : {s['buy_crossed_up']:>6,}  ({s['buy_crossed_up']/checked*100:5.1f}%)\n")
            f.write(f"  BUY trend_up     : {s['buy_trend_up']:>6,}  ({s['buy_trend_up']/checked*100:5.1f}%)\n")
            f.write(f"  BUY atr_ok       : {s['buy_atr_ok']:>6,}  ({s['buy_atr_ok']/checked*100:5.1f}%)\n")
            f.write(f"  BUY adx_ok       : {s['buy_adx_ok']:>6,}  ({s['buy_adx_ok']/checked*100:5.1f}%)\n")
            f.write(f"  BUY rsi_ok       : {s['buy_rsi_ok']:>6,}  ({s['buy_rsi_ok']/checked*100:5.1f}%)\n")
            f.write(f"  BUY full signal  : {s['buy_full']:>6,}  ({s['buy_full']/checked*100:5.1f}%)\n")
            f.write(f"  SELL crossed_down: {s['sell_crossed_down']:>6,}  ({s['sell_crossed_down']/checked*100:5.1f}%)\n")
            f.write(f"  SELL trend_down  : {s['sell_trend_down']:>6,}  ({s['sell_trend_down']/checked*100:5.1f}%)\n")
            f.write(f"  SELL atr_ok      : {s['sell_atr_ok']:>6,}  ({s['sell_atr_ok']/checked*100:5.1f}%)\n")
            f.write(f"  SELL adx_ok      : {s['sell_adx_ok']:>6,}  ({s['sell_adx_ok']/checked*100:5.1f}%)\n")
            f.write(f"  SELL rsi_ok      : {s['sell_rsi_ok']:>6,}  ({s['sell_rsi_ok']/checked*100:5.1f}%)\n")
            f.write(f"  SELL full signal : {s['sell_full']:>6,}  ({s['sell_full']/checked*100:5.1f}%)\n\n")
            f.write(report)
            f.write("\n--- TRADE LOG ---\n")
            f.write(tdf.to_string(index=False))
            f.write("\n\n--- EQUITY CURVE ---\n")
            f.write(eq.to_string(index=False))

        tdf.to_csv(TRADES_CSV, index=False)


# =============================================================================
# ENTRY POINT
# =============================================================================
if __name__ == "__main__":
    bt = Backtester()
    bt.run()
