Files
stock_cursor_v0/backend/attribution_analysis.py
2026-06-14 11:54:45 +08:00

438 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""持仓归因分析深化 — 选股/择时能力、持仓时长、理由有效性分析。
功能:
1. 收益归因分解(选股 vs 择时 vs 运气)
2. 持仓时长分析(短线/中线/长线胜率)
3. 买入理由有效性验证
4. 情绪标签相关性分析
5. 对标指数超额收益拆解
"""
import datetime as dt
from typing import Dict, List, Any, Tuple
from collections import defaultdict
import numpy as np
from sqlalchemy import select, and_
from db import get_session
from models import Trade, DailyQuote, IndexDaily, StockMetric
def analyze_attribution() -> Dict[str, Any]:
"""综合归因分析"""
with get_session() as s:
trades = s.execute(select(Trade).order_by(Trade.date, Trade.id)).scalars().all()
if not trades:
return {"ok": False, "msg": "暂无交易记录"}
stock_timing = analyze_stock_vs_timing(trades)
hold_period = analyze_hold_period(trades)
reason_valid = analyze_reason_validity(trades)
emotion_corr = analyze_emotion_correlation(trades)
excess_return = analyze_excess_return(trades)
return {
"ok": True,
"stock_vs_timing": stock_timing,
"hold_period": hold_period,
"reason_validity": reason_valid,
"emotion_correlation": emotion_corr,
"excess_return": excess_return,
}
def analyze_stock_vs_timing(trades: List[Trade]) -> Dict[str, Any]:
"""分解选股能力 vs 择时能力
选股能力:买入后股票的整体涨幅(持有期间市场表现)
择时能力:实际买卖点的精准度(买在低点、卖在高点)
运气成分:市场整体波动的影响
"""
with get_session() as s:
stock_trades = defaultdict(list)
for t in trades:
stock_trades[t.code].append(t)
results = []
total_stock_contrib = 0.0
total_timing_contrib = 0.0
for code, stock_trades_list in stock_trades.items():
dates = [t.date for t in stock_trades_list]
start = min(dates)
end = max(dates)
prices = {}
for d, close in s.execute(
select(DailyQuote.date, DailyQuote.close)
.where(
and_(
DailyQuote.code == code,
DailyQuote.date >= start,
DailyQuote.date <= end,
)
)
.order_by(DailyQuote.date)
).all():
prices[d] = float(close)
if not prices:
continue
first_price = prices[min(prices.keys())]
last_price = prices[max(prices.keys())]
stock_return = (last_price / first_price - 1) * 100
buys = [t for t in stock_trades_list if t.side == "buy"]
sells = [t for t in stock_trades_list if t.side == "sell"]
if buys and sells:
avg_buy = np.mean([t.price for t in buys])
avg_sell = np.mean([t.price for t in sells])
ideal_buy = min(prices.values())
ideal_sell = max(prices.values())
buy_timing = (
(1 - (avg_buy - ideal_buy) / (ideal_sell - ideal_buy)) * 100
if ideal_sell > ideal_buy
else 50
)
sell_timing = (
((avg_sell - ideal_buy) / (ideal_sell - ideal_buy)) * 100
if ideal_sell > ideal_buy
else 50
)
timing_score = (buy_timing + sell_timing) / 2
actual_return = (avg_sell / avg_buy - 1) * 100
results.append(
{
"code": code,
"stock_return": round(stock_return, 2),
"timing_score": round(timing_score, 1),
"actual_return": round(actual_return, 2),
}
)
total_stock_contrib += stock_return
total_timing_contrib += timing_score
if not results:
return {"ok": False, "msg": "数据不足"}
avg_stock = total_stock_contrib / len(results)
avg_timing = total_timing_contrib / len(results)
return {
"ok": True,
"stock_ability": round(avg_stock, 2),
"timing_ability": round(avg_timing, 1),
"interpretation": {
"stock": "正值表示选对了股票(股票整体上涨),负值表示选错了",
"timing": "100分满分表示买卖点的精准度50分为平均水平",
},
"by_stock": results,
}
def analyze_hold_period(trades: List[Trade]) -> Dict[str, Any]:
"""持仓时长分析
短线:持仓 <= 5天
中线:持仓 6-30天
长线:持仓 > 30天
"""
holdings = defaultdict(list)
closed_trades = []
for t in trades:
if t.side == "buy":
holdings[t.code].append({"trade": t, "qty": t.qty})
else:
remaining = t.qty
while remaining > 0 and holdings[t.code]:
hold = holdings[t.code][0]
sell_qty = min(remaining, hold["qty"])
hold_days = (t.date - hold["trade"].date).days
pnl = (t.price - hold["trade"].price) * sell_qty - t.fee * (
sell_qty / t.qty
)
pnl_pct = (t.price / hold["trade"].price - 1) * 100
closed_trades.append(
{
"code": t.code,
"buy_date": hold["trade"].date,
"sell_date": t.date,
"hold_days": hold_days,
"buy_price": hold["trade"].price,
"sell_price": t.price,
"qty": sell_qty,
"pnl": pnl,
"pnl_pct": pnl_pct,
}
)
hold["qty"] -= sell_qty
remaining -= sell_qty
if hold["qty"] <= 0:
holdings[t.code].pop(0)
if not closed_trades:
return {"ok": False, "msg": "暂无已平仓交易"}
short_term = [t for t in closed_trades if t["hold_days"] <= 5]
mid_term = [t for t in closed_trades if 6 <= t["hold_days"] <= 30]
long_term = [t for t in closed_trades if t["hold_days"] > 30]
def calc_stats(trades_list):
if not trades_list:
return {"count": 0, "win_rate": 0, "avg_return": 0, "avg_days": 0}
wins = sum(1 for t in trades_list if t["pnl"] > 0)
return {
"count": len(trades_list),
"win_rate": round(wins / len(trades_list) * 100, 1),
"avg_return": round(np.mean([t["pnl_pct"] for t in trades_list]), 2),
"avg_days": round(np.mean([t["hold_days"] for t in trades_list]), 1),
"total_pnl": round(sum(t["pnl"] for t in trades_list), 2),
}
return {
"ok": True,
"short_term": calc_stats(short_term),
"mid_term": calc_stats(mid_term),
"long_term": calc_stats(long_term),
"recommendation": _recommend_hold_period(short_term, mid_term, long_term),
}
def _recommend_hold_period(short, mid, long) -> str:
"""推荐最佳持仓周期"""
periods = [
("短线≤5天", short),
("中线6-30天", mid),
("长线(>30天", long),
]
if not any(p for _, p in periods):
return "数据不足"
scores = []
for name, trades_list in periods:
if not trades_list:
scores.append((name, 0))
continue
wins = sum(1 for t in trades_list if t["pnl"] > 0)
win_rate = wins / len(trades_list)
avg_ret = np.mean([t["pnl_pct"] for t in trades_list])
score = win_rate * 50 + (avg_ret / 10) * 50
scores.append((name, score))
scores.sort(key=lambda x: x[1], reverse=True)
best = scores[0][0]
return f"建议重点关注{best},该周期胜率和收益表现最佳"
def analyze_reason_validity(trades: List[Trade]) -> Dict[str, Any]:
"""买入理由有效性验证"""
holdings = defaultdict(list)
reason_stats = defaultdict(lambda: {"trades": [], "wins": 0, "total_pnl": 0})
for t in trades:
if t.side == "buy":
holdings[t.code].append(t)
else:
while t.qty > 0 and holdings[t.code]:
buy = holdings[t.code].pop(0)
qty = min(t.qty, buy.qty)
pnl = (t.price - buy.price) * qty
pnl_pct = (t.price / buy.price - 1) * 100
reason = buy.reason or "未标注"
reason_stats[reason]["trades"].append(pnl_pct)
reason_stats[reason]["total_pnl"] += pnl
if pnl > 0:
reason_stats[reason]["wins"] += 1
buy.qty -= qty
t.qty -= qty
if buy.qty > 0:
holdings[t.code].insert(0, buy)
if not reason_stats:
return {"ok": False, "msg": "暂无已平仓交易"}
results = []
for reason, stats in reason_stats.items():
trades_list = stats["trades"]
results.append(
{
"reason": reason,
"count": len(trades_list),
"win_rate": round(stats["wins"] / len(trades_list) * 100, 1),
"avg_return": round(np.mean(trades_list), 2),
"total_pnl": round(stats["total_pnl"], 2),
"effectiveness": "有效"
if stats["wins"] / len(trades_list) > 0.5
else "无效",
}
)
results.sort(key=lambda x: x["win_rate"], reverse=True)
return {
"ok": True,
"by_reason": results,
"best_reason": results[0]["reason"] if results else None,
"worst_reason": results[-1]["reason"] if results else None,
}
def analyze_emotion_correlation(trades: List[Trade]) -> Dict[str, Any]:
"""情绪标签相关性分析"""
holdings = defaultdict(list)
emotion_stats = defaultdict(lambda: {"trades": [], "wins": 0, "total_pnl": 0})
for t in trades:
if t.side == "buy":
holdings[t.code].append(t)
else:
while t.qty > 0 and holdings[t.code]:
buy = holdings[t.code].pop(0)
qty = min(t.qty, buy.qty)
pnl = (t.price - buy.price) * qty
pnl_pct = (t.price / buy.price - 1) * 100
emotion = buy.emotion or "未标注"
emotion_stats[emotion]["trades"].append(pnl_pct)
emotion_stats[emotion]["total_pnl"] += pnl
if pnl > 0:
emotion_stats[emotion]["wins"] += 1
buy.qty -= qty
t.qty -= qty
if buy.qty > 0:
holdings[t.code].insert(0, buy)
if not emotion_stats:
return {"ok": False, "msg": "暂无已平仓交易"}
results = []
for emotion, stats in emotion_stats.items():
trades_list = stats["trades"]
results.append(
{
"emotion": emotion,
"count": len(trades_list),
"win_rate": round(stats["wins"] / len(trades_list) * 100, 1),
"avg_return": round(np.mean(trades_list), 2),
"total_pnl": round(stats["total_pnl"], 2),
}
)
results.sort(key=lambda x: x["avg_return"], reverse=True)
return {
"ok": True,
"by_emotion": results,
"advice": _generate_emotion_advice(results),
}
def _generate_emotion_advice(results: List[Dict]) -> str:
"""生成情绪建议"""
if not results:
return "数据不足"
best = results[0]
worst = results[-1]
advice = (
f"最佳情绪状态:{best['emotion']}(胜率{best['win_rate']}%"
f"平均收益{best['avg_return']}%\n"
)
advice += (
f"最差情绪状态:{worst['emotion']}(胜率{worst['win_rate']}%"
f"平均收益{worst['avg_return']}%\n"
)
advice += "\n建议:保持理性和纪律,避免在贪婪或恐慌时做决策"
return advice
def analyze_excess_return(trades: List[Trade]) -> Dict[str, Any]:
"""对标指数超额收益拆解"""
if not trades:
return {"ok": False, "msg": "暂无交易记录"}
start_date = min(t.date for t in trades)
end_date = max(t.date for t in trades)
with get_session() as s:
index_data = s.execute(
select(IndexDaily.date, IndexDaily.close)
.where(
and_(
IndexDaily.code == "sh000300",
IndexDaily.date >= start_date,
IndexDaily.date <= end_date,
)
)
.order_by(IndexDaily.date)
).all()
if not index_data:
return {"ok": False, "msg": "缺少指数数据"}
index_start = float(index_data[0][1])
index_end = float(index_data[-1][1])
index_return = (index_end / index_start - 1) * 100
holdings = defaultdict(lambda: {"qty": 0, "cost": 0.0})
realized_pnl = 0.0
total_cost = 0.0
for t in trades:
p = holdings[t.code]
if t.side == "buy":
p["cost"] += t.price * t.qty + t.fee
p["qty"] += t.qty
total_cost += t.price * t.qty + t.fee
else:
if p["qty"] > 0:
avg = p["cost"] / p["qty"]
qty = min(t.qty, p["qty"])
pnl = (t.price - avg) * qty - t.fee
realized_pnl += pnl
p["cost"] -= avg * qty
p["qty"] -= qty
portfolio_return = (realized_pnl / total_cost * 100) if total_cost > 0 else 0
excess_return = portfolio_return - index_return
if excess_return > 0:
source = "选股能力贡献"
interpretation = "组合表现优于大盘,说明选股和择时能力较好"
elif excess_return < -5:
source = "选股/择时失误"
interpretation = "组合表现明显弱于大盘,建议反思选股逻辑和买卖时机"
else:
source = "与大盘持平"
interpretation = "组合表现与大盘接近,可考虑增强选股策略"
return {
"ok": True,
"portfolio_return": round(portfolio_return, 2),
"index_return": round(index_return, 2),
"excess_return": round(excess_return, 2),
"source": source,
"interpretation": interpretation,
"period": f"{start_date} ~ {end_date}",
}