stock_cursor_v0/backend/attribution_analysis.py

"""持仓归因分析深化 — 选股/择时能力、持仓时长、理由有效性分析。

功能：
1. 收益归因分解（选股 vs 择时 vs 运气）
2. 持仓时长分析（短线/中线/长线胜率）
3. 买入理由有效性验证
4. 情绪标签相关性分析
5. 对标指数超额收益拆解
"""
import datetime as dt
from typing import Dict, List, Any, Tuple
from collections import defaultdict
import numpy as np
from sqlalchemy import select, and_

from db import get_session
from models import Trade, DailyQuote, IndexDaily, StockMetric


def analyze_attribution() -> Dict[str, Any]:
    """综合归因分析"""
    with get_session() as s:
        trades = s.execute(select(Trade).order_by(Trade.date, Trade.id)).scalars().all()

    if not trades:
        return {"ok": False, "msg": "暂无交易记录"}

    stock_timing = analyze_stock_vs_timing(trades)
    hold_period = analyze_hold_period(trades)
    reason_valid = analyze_reason_validity(trades)
    emotion_corr = analyze_emotion_correlation(trades)
    excess_return = analyze_excess_return(trades)

    return {
        "ok": True,
        "stock_vs_timing": stock_timing,
        "hold_period": hold_period,
        "reason_validity": reason_valid,
        "emotion_correlation": emotion_corr,
        "excess_return": excess_return,
    }


def analyze_stock_vs_timing(trades: List[Trade]) -> Dict[str, Any]:
    """分解选股能力 vs 择时能力

    选股能力：买入后股票的整体涨幅（持有期间市场表现）
    择时能力：实际买卖点的精准度（买在低点、卖在高点）
    运气成分：市场整体波动的影响
    """
    with get_session() as s:
        stock_trades = defaultdict(list)
        for t in trades:
            stock_trades[t.code].append(t)

        results = []
        total_stock_contrib = 0.0
        total_timing_contrib = 0.0

        for code, stock_trades_list in stock_trades.items():
            dates = [t.date for t in stock_trades_list]
            start = min(dates)
            end = max(dates)

            prices = {}
            for d, close in s.execute(
                select(DailyQuote.date, DailyQuote.close)
                .where(
                    and_(
                        DailyQuote.code == code,
                        DailyQuote.date >= start,
                        DailyQuote.date <= end,
                    )
                )
                .order_by(DailyQuote.date)
            ).all():
                prices[d] = float(close)

            if not prices:
                continue

            first_price = prices[min(prices.keys())]
            last_price = prices[max(prices.keys())]
            stock_return = (last_price / first_price - 1) * 100

            buys = [t for t in stock_trades_list if t.side == "buy"]
            sells = [t for t in stock_trades_list if t.side == "sell"]

            if buys and sells:
                avg_buy = np.mean([t.price for t in buys])
                avg_sell = np.mean([t.price for t in sells])

                ideal_buy = min(prices.values())
                ideal_sell = max(prices.values())

                buy_timing = (
                    (1 - (avg_buy - ideal_buy) / (ideal_sell - ideal_buy)) * 100
                    if ideal_sell > ideal_buy
                    else 50
                )
                sell_timing = (
                    ((avg_sell - ideal_buy) / (ideal_sell - ideal_buy)) * 100
                    if ideal_sell > ideal_buy
                    else 50
                )
                timing_score = (buy_timing + sell_timing) / 2
                actual_return = (avg_sell / avg_buy - 1) * 100

                results.append(
                    {
                        "code": code,
                        "stock_return": round(stock_return, 2),
                        "timing_score": round(timing_score, 1),
                        "actual_return": round(actual_return, 2),
                    }
                )

                total_stock_contrib += stock_return
                total_timing_contrib += timing_score

    if not results:
        return {"ok": False, "msg": "数据不足"}

    avg_stock = total_stock_contrib / len(results)
    avg_timing = total_timing_contrib / len(results)

    return {
        "ok": True,
        "stock_ability": round(avg_stock, 2),
        "timing_ability": round(avg_timing, 1),
        "interpretation": {
            "stock": "正值表示选对了股票（股票整体上涨），负值表示选错了",
            "timing": "100分满分，表示买卖点的精准度，50分为平均水平",
        },
        "by_stock": results,
    }


def analyze_hold_period(trades: List[Trade]) -> Dict[str, Any]:
    """持仓时长分析

    短线：持仓 <= 5天
    中线：持仓 6-30天
    长线：持仓 > 30天
    """
    holdings = defaultdict(list)
    closed_trades = []

    for t in trades:
        if t.side == "buy":
            holdings[t.code].append({"trade": t, "qty": t.qty})
        else:
            remaining = t.qty
            while remaining > 0 and holdings[t.code]:
                hold = holdings[t.code][0]
                sell_qty = min(remaining, hold["qty"])

                hold_days = (t.date - hold["trade"].date).days
                pnl = (t.price - hold["trade"].price) * sell_qty - t.fee * (
                    sell_qty / t.qty
                )
                pnl_pct = (t.price / hold["trade"].price - 1) * 100

                closed_trades.append(
                    {
                        "code": t.code,
                        "buy_date": hold["trade"].date,
                        "sell_date": t.date,
                        "hold_days": hold_days,
                        "buy_price": hold["trade"].price,
                        "sell_price": t.price,
                        "qty": sell_qty,
                        "pnl": pnl,
                        "pnl_pct": pnl_pct,
                    }
                )

                hold["qty"] -= sell_qty
                remaining -= sell_qty

                if hold["qty"] <= 0:
                    holdings[t.code].pop(0)

    if not closed_trades:
        return {"ok": False, "msg": "暂无已平仓交易"}

    short_term = [t for t in closed_trades if t["hold_days"] <= 5]
    mid_term = [t for t in closed_trades if 6 <= t["hold_days"] <= 30]
    long_term = [t for t in closed_trades if t["hold_days"] > 30]

    def calc_stats(trades_list):
        if not trades_list:
            return {"count": 0, "win_rate": 0, "avg_return": 0, "avg_days": 0}
        wins = sum(1 for t in trades_list if t["pnl"] > 0)
        return {
            "count": len(trades_list),
            "win_rate": round(wins / len(trades_list) * 100, 1),
            "avg_return": round(np.mean([t["pnl_pct"] for t in trades_list]), 2),
            "avg_days": round(np.mean([t["hold_days"] for t in trades_list]), 1),
            "total_pnl": round(sum(t["pnl"] for t in trades_list), 2),
        }

    return {
        "ok": True,
        "short_term": calc_stats(short_term),
        "mid_term": calc_stats(mid_term),
        "long_term": calc_stats(long_term),
        "recommendation": _recommend_hold_period(short_term, mid_term, long_term),
    }


def _recommend_hold_period(short, mid, long) -> str:
    """推荐最佳持仓周期"""
    periods = [
        ("短线（≤5天）", short),
        ("中线（6-30天）", mid),
        ("长线（>30天）", long),
    ]

    if not any(p for _, p in periods):
        return "数据不足"

    scores = []
    for name, trades_list in periods:
        if not trades_list:
            scores.append((name, 0))
            continue
        wins = sum(1 for t in trades_list if t["pnl"] > 0)
        win_rate = wins / len(trades_list)
        avg_ret = np.mean([t["pnl_pct"] for t in trades_list])
        score = win_rate * 50 + (avg_ret / 10) * 50
        scores.append((name, score))

    scores.sort(key=lambda x: x[1], reverse=True)
    best = scores[0][0]

    return f"建议重点关注{best}，该周期胜率和收益表现最佳"


def analyze_reason_validity(trades: List[Trade]) -> Dict[str, Any]:
    """买入理由有效性验证"""
    holdings = defaultdict(list)
    reason_stats = defaultdict(lambda: {"trades": [], "wins": 0, "total_pnl": 0})

    for t in trades:
        if t.side == "buy":
            holdings[t.code].append(t)
        else:
            while t.qty > 0 and holdings[t.code]:
                buy = holdings[t.code].pop(0)
                qty = min(t.qty, buy.qty)

                pnl = (t.price - buy.price) * qty
                pnl_pct = (t.price / buy.price - 1) * 100

                reason = buy.reason or "未标注"
                reason_stats[reason]["trades"].append(pnl_pct)
                reason_stats[reason]["total_pnl"] += pnl
                if pnl > 0:
                    reason_stats[reason]["wins"] += 1

                buy.qty -= qty
                t.qty -= qty
                if buy.qty > 0:
                    holdings[t.code].insert(0, buy)

    if not reason_stats:
        return {"ok": False, "msg": "暂无已平仓交易"}

    results = []
    for reason, stats in reason_stats.items():
        trades_list = stats["trades"]
        results.append(
            {
                "reason": reason,
                "count": len(trades_list),
                "win_rate": round(stats["wins"] / len(trades_list) * 100, 1),
                "avg_return": round(np.mean(trades_list), 2),
                "total_pnl": round(stats["total_pnl"], 2),
                "effectiveness": "有效"
                if stats["wins"] / len(trades_list) > 0.5
                else "无效",
            }
        )

    results.sort(key=lambda x: x["win_rate"], reverse=True)

    return {
        "ok": True,
        "by_reason": results,
        "best_reason": results[0]["reason"] if results else None,
        "worst_reason": results[-1]["reason"] if results else None,
    }


def analyze_emotion_correlation(trades: List[Trade]) -> Dict[str, Any]:
    """情绪标签相关性分析"""
    holdings = defaultdict(list)
    emotion_stats = defaultdict(lambda: {"trades": [], "wins": 0, "total_pnl": 0})

    for t in trades:
        if t.side == "buy":
            holdings[t.code].append(t)
        else:
            while t.qty > 0 and holdings[t.code]:
                buy = holdings[t.code].pop(0)
                qty = min(t.qty, buy.qty)

                pnl = (t.price - buy.price) * qty
                pnl_pct = (t.price / buy.price - 1) * 100

                emotion = buy.emotion or "未标注"
                emotion_stats[emotion]["trades"].append(pnl_pct)
                emotion_stats[emotion]["total_pnl"] += pnl
                if pnl > 0:
                    emotion_stats[emotion]["wins"] += 1

                buy.qty -= qty
                t.qty -= qty
                if buy.qty > 0:
                    holdings[t.code].insert(0, buy)

    if not emotion_stats:
        return {"ok": False, "msg": "暂无已平仓交易"}

    results = []
    for emotion, stats in emotion_stats.items():
        trades_list = stats["trades"]
        results.append(
            {
                "emotion": emotion,
                "count": len(trades_list),
                "win_rate": round(stats["wins"] / len(trades_list) * 100, 1),
                "avg_return": round(np.mean(trades_list), 2),
                "total_pnl": round(stats["total_pnl"], 2),
            }
        )

    results.sort(key=lambda x: x["avg_return"], reverse=True)

    return {
        "ok": True,
        "by_emotion": results,
        "advice": _generate_emotion_advice(results),
    }


def _generate_emotion_advice(results: List[Dict]) -> str:
    """生成情绪建议"""
    if not results:
        return "数据不足"

    best = results[0]
    worst = results[-1]

    advice = (
        f"最佳情绪状态：{best['emotion']}（胜率{best['win_rate']}%，"
        f"平均收益{best['avg_return']}%）\n"
    )
    advice += (
        f"最差情绪状态：{worst['emotion']}（胜率{worst['win_rate']}%，"
        f"平均收益{worst['avg_return']}%）\n"
    )
    advice += "\n建议：保持理性和纪律，避免在贪婪或恐慌时做决策"

    return advice


def analyze_excess_return(trades: List[Trade]) -> Dict[str, Any]:
    """对标指数超额收益拆解"""
    if not trades:
        return {"ok": False, "msg": "暂无交易记录"}

    start_date = min(t.date for t in trades)
    end_date = max(t.date for t in trades)

    with get_session() as s:
        index_data = s.execute(
            select(IndexDaily.date, IndexDaily.close)
            .where(
                and_(
                    IndexDaily.code == "sh000300",
                    IndexDaily.date >= start_date,
                    IndexDaily.date <= end_date,
                )
            )
            .order_by(IndexDaily.date)
        ).all()

    if not index_data:
        return {"ok": False, "msg": "缺少指数数据"}

    index_start = float(index_data[0][1])
    index_end = float(index_data[-1][1])
    index_return = (index_end / index_start - 1) * 100

    holdings = defaultdict(lambda: {"qty": 0, "cost": 0.0})
    realized_pnl = 0.0
    total_cost = 0.0

    for t in trades:
        p = holdings[t.code]
        if t.side == "buy":
            p["cost"] += t.price * t.qty + t.fee
            p["qty"] += t.qty
            total_cost += t.price * t.qty + t.fee
        else:
            if p["qty"] > 0:
                avg = p["cost"] / p["qty"]
                qty = min(t.qty, p["qty"])
                pnl = (t.price - avg) * qty - t.fee
                realized_pnl += pnl
                p["cost"] -= avg * qty
                p["qty"] -= qty

    portfolio_return = (realized_pnl / total_cost * 100) if total_cost > 0 else 0
    excess_return = portfolio_return - index_return

    if excess_return > 0:
        source = "选股能力贡献"
        interpretation = "组合表现优于大盘，说明选股和择时能力较好"
    elif excess_return < -5:
        source = "选股/择时失误"
        interpretation = "组合表现明显弱于大盘，建议反思选股逻辑和买卖时机"
    else:
        source = "与大盘持平"
        interpretation = "组合表现与大盘接近，可考虑增强选股策略"

    return {
        "ok": True,
        "portfolio_return": round(portfolio_return, 2),
        "index_return": round(index_return, 2),
        "excess_return": round(excess_return, 2),
        "source": source,
        "interpretation": interpretation,
        "period": f"{start_date} ~ {end_date}",
    }