核心数据模型
定义系统中所有实体的 Schema,确保引擎间数据交换的一致性。
所有模型基于 Pydantic v2,同时服务于数据校验、API 文档、序列化(Redis/JSON)、ORM 映射(SQLAlchemy)。
Entity 关系图
mermaid
erDiagram
%% ─── 叙事层 ───
Narrative {
string id PK
string theme
string description
NarrativeStatus status
float source_credibility
datetime first_seen
datetime last_updated
datetime confirmed_at
vector embedding
}
NarrativeEvent {
string event_id PK
string narrative_id FK
NarrativeEventType event_type
datetime timestamp
json payload
}
NarrativeSnapshot {
string narrative_id FK
datetime snapshot_at
float strength_latest
float strength_ewma
float sentiment_latest
float sentiment_ewma
}
NarrativeEdge {
string edge_id PK
string source_narrative_id FK
string target_narrative_id FK
NarrativeRelationType relation_type
float strength
datetime last_validated
}
%% ─── Persona 层 ───
PersonaConfig {
string id PK
string name
string core_framework
string analysis_framework
string scoring_rubric
}
PersonaVersion {
string version_id PK
string persona_id FK
int version_number
PersonaChangeReason change_reason
datetime active_from
datetime active_until
}
PersonaPerformance {
string persona_id FK
float ensemble_weight
int total_judgments
int correct_judgments
float accuracy_rate
float recent_accuracy_ewma
}
%% ─── 数据层 ───
IndicatorSnapshot {
string snapshot_id PK
datetime snapshot_at
}
IndicatorValue {
string name
IndicatorCategory category
float value
float z_score
float percentile
TrendDirection trend
bool is_anomaly
}
SentimentSnapshot {
string snapshot_id PK
datetime snapshot_at
}
RetailSentiment {
float fear_greed_index
float put_call_ratio
float social_sentiment
}
MediaSentiment {
float news_sentiment
int news_volume
float central_bank_tone
}
InstitutionalSentiment {
float fund_cash_level
float etf_flow_direction
float cftc_net_position
}
%% ─── 判断层 ───
Judgment {
string judgment_id PK
datetime created_at
string narrative_snapshot_id FK
string indicator_snapshot_id FK
string sentiment_snapshot_id FK
}
PersonaJudgment {
string persona_id FK
int persona_version
MacroRegime regime
float regime_confidence
float narrative_belief
string reasoning_chain
}
DebateRound {
int round_number
}
JudgmentSynthesis {
MacroRegime regime
float regime_confidence
NarrativeDataAlignment alignment
float persona_consensus_level
}
JudgmentValidation {
datetime validated_at
bool regime_was_correct
string lessons_learned
}
%% ─── 交易层 ───
MismatchScore {
string mismatch_id PK
float mismatch_raw
float mismatch_sustained
MismatchQuadrant quadrant
float signal_strength
}
TradeProposal {
string proposal_id PK
string direction
float conviction_level
TradeStatus status
}
%% ─── 关系 ───
Narrative ||--o{ NarrativeEvent : "产生事件"
Narrative ||--o| NarrativeSnapshot : "最新快照"
Narrative }o--o{ NarrativeEdge : "关系图"
Narrative }o--o{ Narrative : "支撑/矛盾/包含/演化"
PersonaConfig ||--o| PersonaVersion : "版本化"
PersonaConfig ||--o| PersonaPerformance : "表现记录"
PersonaVersion ||--o| PersonaPerformance : "该版本表现"
IndicatorSnapshot ||--o{ IndicatorValue : "包含指标"
SentimentSnapshot ||--o| RetailSentiment : "散户"
SentimentSnapshot ||--o| MediaSentiment : "媒体"
SentimentSnapshot ||--o| InstitutionalSentiment : "机构"
Judgment ||--o{ PersonaJudgment : "各Persona判断"
Judgment ||--o{ DebateRound : "辩论记录"
Judgment ||--o| JudgmentSynthesis : "合成结果"
Judgment ||--o| JudgmentValidation : "事后验证"
Judgment }o--|| NarrativeSnapshot : "输入"
Judgment }o--|| IndicatorSnapshot : "输入"
Judgment }o--|| SentimentSnapshot : "输入"
PersonaJudgment }o--|| PersonaConfig : "由谁判断"
MismatchScore ||--o| Judgment : "基于判断"
TradeProposal ||--o| MismatchScore : "基于错配"枚举定义(enums.py)
所有业务枚举集中管理,避免散落在各模型中。
python
# --- 叙事相关 ---
class NarrativeStatus(str, Enum):
candidate = "candidate" # 候选,等待验证
active = "active" # 活跃追踪中
fading = "fading" # 衰退中
archived = "archived" # 已归档
class NarrativeEventType(str, Enum):
created = "created"
strength_updated = "strength_updated"
sentiment_updated = "sentiment_updated"
status_changed = "status_changed"
source_added = "source_added"
source_removed = "source_removed"
merged_into = "merged_into"
split_from = "split_from"
tag_added = "tag_added"
description_updated = "description_updated"
class NarrativeRelationType(str, Enum):
supports = "supports"
contradicts = "contradicts"
evolves_to = "evolves_to"
contains = "contains"
intersects = "intersects"
merges_into = "merges_into"
splits_from = "splits_from"
# --- 趋势 ---
class TrendDirection(str, Enum):
rising = "rising"
stable = "stable"
falling = "falling"
accelerating = "accelerating" # 加速上升
decelerating = "decelerating" # 减速上升
# --- 市场 Regime ---
class MacroRegime(str, Enum):
expansion = "expansion" # 扩张
late_cycle = "late_cycle" # 周期末
stagflation = "stagflation" # 滞胀
recession = "recession" # 衰退
recovery = "recovery" # 复苏
deflation = "deflation" # 通缩
uncertainty = "uncertainty" # 不确定(置信度低时)
# --- 资产类别 ---
class AssetClass(str, Enum):
equity = "equity"
fixed_income = "fixed_income"
fx = "fx"
commodity = "commodity"
crypto = "crypto"
alternative = "alternative"
# --- 指标分组 ---
class IndicatorCategory(str, Enum):
rates = "rates" # 利率
credit = "credit" # 信贷
equity = "equity" # 股票
fx = "fx" # 外汇
commodity = "commodity" # 大宗商品
crypto = "crypto" # 加密
leverage = "leverage" # 杠杆
sentiment = "sentiment" # 情绪
# --- 数据来源 ---
class DataSource(str, Enum):
openbb = "openbb"
ibkr = "ibkr"
longbridge = "longbridge"
binance = "binance"
fred = "fred"
manual = "manual"
class SourceType(str, Enum):
news = "news"
central_bank = "central_bank"
data_release = "data_release"
social_media = "social_media"
earnings_report = "earnings_report"
research = "research"
manual = "manual" # 人工注入
# --- 错配分类 ---
class MismatchQuadrant(str, Enum):
alpha_long = "alpha_long" # 智库看多 + 情绪看空
alpha_short = "alpha_short" # 智库看空 + 情绪看多
beta_long = "beta_long" # 智库看多 + 情绪看多
beta_short = "beta_short" # 智库看空 + 情绪看空
# --- 叙事-数据对齐 ---
class NarrativeDataAlignment(str, Enum):
aligned = "aligned" # 叙事与数据一致
diverging = "diverging" # 叙事与数据分歧
data_leading = "data_leading" # 数据领先叙事
narrative_leading = "narrative_leading" # 叙事领先数据
# --- Persona 变更原因 ---
class PersonaChangeReason(str, Enum):
manual_update = "manual_update"
meta_learning = "meta_learning"
regime_adaptation = "regime_adaptation"
performance_review = "performance_review"
# --- 交易状态 ---
class TradeStatus(str, Enum):
proposed = "proposed"
approved = "approved"
executed = "executed"
closed = "closed"
cancelled = "cancelled"实体关系总览
Narrative ──1:N──→ NarrativeEvent
Narrative ──1:1──→ NarrativeSnapshot (最新快照)
Narrative ──N:N──→ Narrative (通过 NarrativeEdge)
Narrative ──1:N──→ SourceRef
PersonaConfig ──1:1──→ PersonaPerformance
Persona ──1:N──→ PersonaVersion
IndicatorSnapshot ──1:N──→ IndicatorValue
SentimentSnapshot ──1:N──→ Retail/Media/Institutional
Judgment ──1:N──→ PersonaJudgment ──N:1──→ Persona
Judgment ──1:N──→ DebateRound
Judgment ──1:1──→ JudgmentSynthesis
Judgment ──1:1──→ JudgmentValidation (延迟填充)
Judgment ──1:1──→ NarrativeSnapshot
Judgment ──1:1──→ IndicatorSnapshot
Judgment ──1:1──→ SentimentSnapshot
Mismatch ──1:1──→ Judgment
TradeProposal ──1:1──→ Mismatch1. 叙事模型 (Narrative)
系统的核心对象。叙事不是静态数据,是随时间演变的事件流。
1.1 叙事主表
python
class Narrative(BaseModel):
id: str # 唯一标识
theme: str # 叙事主题,如 "Fed pivot expectation"
description: str # 叙事描述(LLM 生成)
# 状态
status: NarrativeStatus # candidate / active / fading / archived
strength: StrengthRecord # 强度(最新值 + EWMA)
sentiment: SentimentRecord # 情绪(最新值 + EWMA)
# 来源
sources: list[SourceRef] # 支撑该叙事的新闻/数据引用
source_credibility: float # [0, 1] 来源加权可信度
# 时间
first_seen: datetime # 首次发现
last_updated: datetime # 最近更新
confirmed_at: datetime | None # 从 candidate 转为 active 的时间
# 语义向量(用于相似叙事匹配,pgvector 存储)
embedding: list[float] | None
# 元数据
tags: list[str] # 分类标签,如 ["fed", "rates", "dovish"]
created_at: datetime
class StrengthRecord(BaseModel):
"""强度双追踪:最新值 + EWMA"""
latest: float # [0, 1] 最新原始值
ewma: float # [0, 1] 指数加权移动平均
ewma_alpha: float # 平滑参数(可自优化)
updated_at: datetime
class SentimentRecord(BaseModel):
"""情绪双追踪:最新值 + EWMA"""
latest: float # [-1, 1] 最新原始值
ewma: float # [-1, 1] 指数加权移动平均
ewma_alpha: float # 平滑参数(可自优化)
updated_at: datetime双信号的价值:
latest 偏离 ewma 很大 → 市场出现极端情绪/转折信号
latest 贴近 ewma → 趋势稳定,没有新信息
ewma 持续上升/latest 开始下降 → 趋势可能见顶EWMA 更新逻辑:
python
def update_ewma(current_ewma: float, new_value: float, alpha: float) -> float:
return alpha * new_value + (1 - alpha) * current_ewma状态机:
candidate → active → fading → archived
↓
rejected(候选未通过验证)1.2 来源引用
python
class SourceRef(BaseModel):
source_type: SourceType # news / central_bank / data_release / social_media / manual / ...
outlet: str # "WSJ" | "Reuters" | "央行官网"
author: str | None # "Nick Timiraos"(高权重记者)
credibility_weight: float # [0, 1] 来源可信度
url: str | None
published_at: datetime
snippet: str # 原文片段
# 人工注入时额外信息
injected_by: str | None # 注入者(source_type=manual 时)
injection_reason: str | None # 注入理由1.3 事件溯源 (Event Sourcing)
叙事不存"当前状态",而是存"所有事件",当前状态从事件流中重建(或从快照恢复)。
python
class NarrativeEvent(BaseModel):
"""叙事事件——不可变的原子记录"""
event_id: str
narrative_id: str
event_type: NarrativeEventType
timestamp: datetime
payload: dict # 根据 event_type 不同而不同1.4 快照
定期生成,避免每次查询都重放事件流。
python
class NarrativeSnapshot(BaseModel):
narrative_id: str
snapshot_at: datetime
# 当前状态的完整快照
status: NarrativeStatus
strength_latest: float
strength_ewma: float
sentiment_latest: float
sentiment_ewma: float
active_sources: list[SourceRef]
tags: list[str]1.5 叙事关系图
python
class NarrativeEdge(BaseModel):
"""叙事之间的关系——构成动态图"""
edge_id: str
source_narrative_id: str
target_narrative_id: str
relation_type: NarrativeRelationType
strength: float # [0, 1] 关系强度
created_at: datetime
last_validated: datetime # 最近一次确认关系仍成立
evidence: list[str] # 支撑该关系的证据关系图的动态性示例:
T1: "Fed 降息预期" ─supports→ "美股反弹"
T2: "通胀粘性" 出现,削弱支撑关系
"Fed 降息预期" ─supports(0.3)→ "美股反弹" (strength 从 0.8 降至 0.3)
T3: 两个叙事合并
"Fed 降息预期" + "通胀粘性" ─merges_into→ "滞胀困境"1.6 存储结构
narratives # 叙事主表(id, theme, created_at, embedding)
narrative_events # 事件表(只追加,不可变)
narrative_snapshots # 快照表(定期生成,用于快速查询)
narrative_edges # 关系表2. Persona 模型
Persona 分为配置(来自 skill.md,不常变)和表现(持续更新)两层。
2.1 配置(Markdown 为源,数据库为缓存)
source of truth: .agents/skills/persona-soros/skill.md
.agents/skills/persona-buffett/skill.md
...
derived cache: persona_configs 表(DB 中,快速读取)python
class PersonaConfig(BaseModel):
"""Persona 配置——从 skill.md 同步,变更需人工审批"""
id: str # "soros", "buffett", "dalio" ...
name: str
description: str
# 核心框架
core_framework: str # "反身性理论" / "价值投资" / ...
analysis_framework: str # 分析框架的详细 prompt(LLM 用)
scoring_rubric: str # 打分标准
# 分析偏好
focus_areas: list[str] # 关注什么
ignore_areas: list[str] # 忽略什么
bias_tendency: str # "extreme_judgments" / "conservative" / ...同步机制:
python
class PersonaSync:
"""Markdown ↔ DB 同步"""
source_dir: str = ".agents/skills/persona-*/skill.md"
async def sync_all(self):
"""启动时或手动触发:从 md 文件重建 DB 缓存"""
for skill_file in glob(self.source_dir):
config = parse_skill_md(skill_file)
await upsert_persona_config(config)2.2 版本化
Persona 随时间演进,每个版本不可变。
python
class PersonaVersion(BaseModel):
"""Persona 的一个版本——不可变"""
version_id: str
persona_id: str
version_number: int # 1, 2, 3 ...
# 该版本的配置快照
config_snapshot: PersonaConfig
# 版本元数据
created_at: datetime
change_reason: PersonaChangeReason
change_description: str # 改了什么,为什么
release_notes: str | None # 对应 persona repo 中 release.md 的内容
git_commit: str | None # 该版本对应的 git commit hash
# 该版本的表现(在其有效期内)
performance_in_effect: PersonaPerformance | None
active_from: datetime
active_until: datetime | None # None = 当前生效版本演化路径:
版本 1: 初始 Persona(从 skill.md 加载)
↓ 表现分析:在 "stagflation" regime 下准确率低
版本 2: 人工调整 scoring_rubric(加入通胀敏感度权重)
↓ 元学习发现:对抗辩论中经常被 Buffett 说服但实际 Buffett 错了
版本 3: 调整 debate 策略(增加自我辩护强度)2.3 表现记录
python
class PersonaPerformance(BaseModel):
"""Persona 表现记录——自动更新"""
persona_id: str
# 权重
ensemble_weight: float # [0, 1] 当前 ensemble 投票权重
weight_history: list[WeightSnapshot]
# 统计
total_judgments: int
correct_judgments: int
accuracy_rate: float
accuracy_by_regime: dict[str, float] # 分 regime 的准确率
# 擅长/不擅长
strong_categories: list[str]
weak_categories: list[str]
# 最近表现(衰减加权,近期表现更重要)
recent_accuracy_ewma: float
class WeightSnapshot(BaseModel):
timestamp: datetime
weight: float
reason: str # 权重变更原因权重更新(贝叶斯方法,保留探索空间):
python
def update_weight(persona: PersonaPerformance, judgment_result: bool):
learning_rate = 0.05
alpha = persona.correct_judgments + 1
beta = persona.total_judgments - persona.correct_judgments + 1
expected_accuracy = alpha / (alpha + beta)
exploration_bonus = 0.1 # 最低权重,防止归零
new_weight = max(
exploration_bonus,
expected_accuracy * (1 - learning_rate) + persona.ensemble_weight * learning_rate
)
persona.ensemble_weight = new_weight2.4 元学习优化建议
python
class PersonaEvolutionProposal(BaseModel):
"""元学习 Agent 生成的 Persona 优化建议"""
proposal_id: str
persona_id: str
current_version: int
# 分析依据
analysis_period: tuple[datetime, datetime]
identified_weakness: str
evidence: list[str]
# 建议修改
proposed_changes: dict
expected_improvement: str
# 需要人工审批
status: str # "proposed" / "approved" / "rejected"3. 指标模型 (Indicator)
python
class IndicatorSnapshot(BaseModel):
"""指标快照——一个时间点的所有市场指标"""
snapshot_id: str
snapshot_at: datetime
indicators: dict[str, IndicatorValue]
class IndicatorValue(BaseModel):
"""单个指标的完整状态"""
name: str # "yield_curve_2s10s"
category: IndicatorCategory # rates / credit / equity / ...
value: float
unit: str # "percent", "bps", "index"
source: DataSource # ibkr / openbb / binance / ...
# 统计上下文
z_score: float | None
percentile: float | None
history_30d: list[float] | None
# 趋势
trend: TrendDirection
change_1d: float
change_1w: float
# 异常标记
is_anomaly: bool # z_score > 2 或 < -2
anomaly_note: str | None
class IndicatorGroups(BaseModel):
"""指标按类别分组,方便 Persona 引用"""
rates: dict[str, IndicatorValue]
credit: dict[str, IndicatorValue]
equity: dict[str, IndicatorValue]
fx: dict[str, IndicatorValue]
commodity: dict[str, IndicatorValue]
crypto: dict[str, IndicatorValue]
leverage: dict[str, IndicatorValue]4. 情绪模型 (Sentiment)
python
class SentimentSnapshot(BaseModel):
"""情绪快照——一个时间点的市场情绪全景"""
snapshot_id: str
snapshot_at: datetime
retail: RetailSentiment # 散户情绪
media: MediaSentiment # 媒体情绪
institutional: InstitutionalSentiment # 机构行为
class RetailSentiment(BaseModel):
fear_greed_index: float # [0, 100]
put_call_ratio: float
margin_balance_change: float
social_sentiment: float # [-1, 1]
search_trend: float
class MediaSentiment(BaseModel):
news_sentiment: float # [-1, 1]
news_volume: int # 新闻数量(叙事热度)
central_bank_tone: float # [-1, 1] 鸽鹰度
kol_consensus: float # [-1, 1] KOL 观点共识
class InstitutionalSentiment(BaseModel):
fund_cash_level: float
etf_flow_direction: float # [-1, 1]
cftc_net_position: float
ipo_activity: float5. 判断模型 (Judgment)
系统最核心的输出——每次 ensemble 分析的完整记录。
python
class Judgment(BaseModel):
"""一次完整的 ensemble 判断"""
judgment_id: str
created_at: datetime
# 输入快照
narrative_snapshot_id: str
indicator_snapshot_id: str
sentiment_snapshot_id: str
# Persona 独立判断
persona_judgments: list[PersonaJudgment]
# 对抗辩论记录
debate_rounds: list[DebateRound]
# Ensemble 合成结果
synthesis: JudgmentSynthesis
# 事后验证(延迟填充)
validation: JudgmentValidation | None
class PersonaJudgment(BaseModel):
"""单个 Persona 的独立判断"""
persona_id: str
persona_version: int # 使用的 Persona 版本号
timestamp: datetime
# Regime 判断
regime: MacroRegime
regime_confidence: float # [0, 1]
# 叙事评估
narrative_assessment: str # 对当前叙事的看法(自然语言)
narrative_belief: float # [-1, 1] 是否相信当前叙事
# 情景推演
scenarios: list[Scenario]
# 推理链(可审计)
reasoning_chain: str
class Scenario(BaseModel):
"""情景推演"""
description: str
probability: float # [0, 1]
triggers: list[str]
asset_implications: dict[str, str]
class DebateRound(BaseModel):
"""辩论一轮"""
round_number: int
challenges: list[DebateChallenge]
class DebateChallenge(BaseModel):
challenger_id: str
target_id: str
challenge: str
response: str
class JudgmentSynthesis(BaseModel):
"""Ensemble 合成结果"""
# 最终 Regime 判断
regime: MacroRegime
regime_confidence: float
# 叙事-数据对齐
narrative_data_alignment: NarrativeDataAlignment
key_tension: str
# 情景
scenarios: list[Scenario]
# 共识与分歧
persona_consensus_level: float # [0, 1] 共识度
key_disagreements: list[str]
# 可证伪条件
invalidation_conditions: list[str]
class JudgmentValidation(BaseModel):
"""事后验证——延迟填充"""
validated_at: datetime
actual_outcome: str
regime_was_correct: bool
scenario_accuracy: dict[str, bool]
persona_accuracy: dict[str, bool]
lessons_learned: str6. 错配模型 (Mismatch)
python
class MismatchScore(BaseModel):
"""错配评分——智库 vs 情绪"""
mismatch_id: str
created_at: datetime
judgment_id: str
# 智库共识
persona_consensus_direction: float # [-1, 1]
persona_consensus_confidence: float # [0, 1]
# 市场情绪
sentiment_direction: float # [-1, 1]
sentiment_intensity: float # [0, 1]
# 错配计算
mismatch_raw: float # persona_consensus - sentiment_direction
mismatch_duration: timedelta
mismatch_sustained: float # raw × duration
# 分类
quadrant: MismatchQuadrant
signal_strength: float # [0, 1]7. 交易模型 (TradeProposal)——预留
错配之后的交易决策层,当前预留接口,后续细化。
python
class TradeProposal(BaseModel):
"""交易提案——由错配信号生成"""
proposal_id: str
mismatch_id: str
created_at: datetime
# 方向(确定)
direction: str # "long" / "short" / "neutral"
target_assets: list[str]
# 投资逻辑(确定)
thesis: str
conviction_level: float # [0, 1]
# 仓位与风控(待细化)
position_sizing: PositionSizing | None
risk_reward: RiskReward | None
entry_rules: list[str] | None
exit_rules: list[str] | None
# 状态
status: TradeStatus
class PositionSizing(BaseModel):
"""仓位管理——预留,待细化"""
method: str # "kelly" / "fixed_fraction" / "volatility_target"
params: dict
suggested_size: float | None
class RiskReward(BaseModel):
"""胜率盈亏比——预留,待细化"""
estimated_win_rate: float | None
estimated_payoff_ratio: float | None
kelly_fraction: float | None
max_drawdown_tolerance: float | None已讨论决定
- [x] 叙事模型:事件溯源 + 快照 + 关系图 + 双追踪(latest + EWMA)
- [x] 来源模型:可扩展枚举,支持人工注入
- [x] Persona 模型:Markdown 为源、DB 为缓存、版本化演进
- [x] 指标模型:分组 + 统计上下文 + 异常标记
- [x] 情绪模型:散户 / 媒体 / 机构三层
- [x] 判断模型:Persona 独立判断 + 辩论记录 + 合成结果 + 事后验证
- [x] 错配模型:四象限分类 + 持续修正
- [x] 交易模型:预留接口,后续细化凯利公式和仓位管理
- [x] 枚举定义:统一管理,支持扩展
待后续细化
- [ ] PositionSizing 凯利公式具体实现
- [ ] RiskReward 胜率/盈亏比计算逻辑
- [ ] 具体的 SQL DDL / ORM 映射
- [ ] 索引策略和查询优化