You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nmWTAI-Platform/ML/nmWTAI-ML/src/models/forward_surrogate.py

214 lines
8.1 KiB
Python

# -*- coding: utf-8 -*-
"""正演代理模型网络结构。
ForwardSurrogate 输入标准化后的物理参数特征和可选的流量制度编码输出固定长度
拼接曲线log_pressurelog_derivative slope模型采用参数分支 + 流量制度
分支 + 融合主干 + 多输出头的结构便于分别学习静态地层信息动态制度信息以及
二者共同决定的曲线形态
压力和导数输出被拆成 level shape 两部分level 学习整条曲线的纵向偏移shape
学习去均值后的局部形态从结构上减少整体幅值与局部形状之间的相互干扰
"""
from __future__ import annotations
import torch
import torch.nn as nn
def build_mlp(
in_dim: int,
hidden_dims: list[int],
out_dim: int,
dropout: float = 0.0,
) -> nn.Sequential:
"""按隐藏层列表搭建 Linear-ReLU-Dropout 组成的多层感知机。"""
layers: list[nn.Module] = []
prev = in_dim
for h in hidden_dims:
layers.append(nn.Linear(prev, h))
layers.append(nn.ReLU())
if dropout > 0:
layers.append(nn.Dropout(dropout))
prev = h
layers.append(nn.Linear(prev, out_dim))
return nn.Sequential(*layers)
class ScheduleEncoder(nn.Module):
"""神经网络中的流量制度分支,把固定长度制度向量编码为隐层特征。"""
def __init__(self, schedule_dim: int, hidden_dim: int, dropout: float = 0.0):
"""按流量制度向量维度构建两层编码网络。"""
super().__init__()
self.net = nn.Sequential(
nn.Linear(schedule_dim, hidden_dim),
nn.ReLU(),
nn.Dropout(dropout) if dropout > 0 else nn.Identity(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""把流量制度统计特征映射到与参数分支同宽度的隐藏表示。"""
# 该分支只处理制度向量,便于后续与地层参数特征拼接融合。
return self.net(x)
class ParamEncoder(nn.Module):
"""神经网络中的参数分支,把变换后的物理参数编码为隐层特征。"""
def __init__(self, param_dim: int, hidden_dim: int, dropout: float = 0.0):
"""按物理参数特征维度构建两层编码网络。"""
super().__init__()
self.net = nn.Sequential(
nn.Linear(param_dim, hidden_dim),
nn.ReLU(),
nn.Dropout(dropout) if dropout > 0 else nn.Identity(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""把变换后的地层和井筒参数映射为隐藏表示。"""
# 参数特征通常来自 log/asinh 等尺度变换,先编码再与制度分支融合。
return self.net(x)
class ForwardSurrogate(nn.Module):
"""完整曲线正演代理模型。
输入:
params_x: 标准化后的物理参数特征形状 [B, param_dim]
schedule_x: 标准化后的流量制度向量形状 [B, schedule_dim] use_schedule=False
时该输入可为空
输出:
curve_pred: 形状 [B, curve_dim] log_pressurelog_derivativeslope 三段
顺序拼接curve_dim 必须能被 3 整除以便每段拥有相同时间点数
"""
def __init__(
self,
param_dim: int,
schedule_dim: int,
curve_dim: int,
hidden_dim: int = 128,
fusion_hidden_dims: list[int] | None = None,
dropout: float = 0.0,
use_schedule: bool = True,
):
"""构建参数分支、可选流量制度分支、融合主干和三组曲线输出头。"""
super().__init__()
if curve_dim % 3 != 0:
raise ValueError(f"curve_dim={curve_dim} 不能被 3 整除;期望为 pressure/derivative/slope 三段")
if fusion_hidden_dims is None:
fusion_hidden_dims = [256, 256]
self.curve_dim = curve_dim
self.part_dim = curve_dim // 3
self.use_schedule = bool(use_schedule)
# 参数和流量制度的物理含义与尺度差异较大,因此采用两个分支分别编码。
self.param_encoder = ParamEncoder(param_dim, hidden_dim, dropout=dropout)
if self.use_schedule:
self.schedule_encoder = ScheduleEncoder(schedule_dim, hidden_dim, dropout=dropout)
trunk_in_dim = hidden_dim * 2
else:
self.schedule_encoder = None
trunk_in_dim = hidden_dim
trunk_out_dim = fusion_hidden_dims[-1]
self.trunk = build_mlp(
in_dim=trunk_in_dim,
hidden_dims=fusion_hidden_dims,
out_dim=trunk_out_dim,
dropout=dropout,
)
# 压力曲线拆成 level + centered shape
# level 学习整体纵向偏移shape 学习局部曲线形态。
self.pressure_level_head = build_mlp(
in_dim=trunk_out_dim,
hidden_dims=[128],
out_dim=1,
dropout=dropout,
)
self.pressure_shape_head = build_mlp(
in_dim=trunk_out_dim,
hidden_dims=[128],
out_dim=self.part_dim,
dropout=dropout,
)
# 导数曲线同样拆分为 level + shape因为平台、谷值和过渡段
# 对自动拟合筛选非常重要。
self.derivative_level_head = build_mlp(
in_dim=trunk_out_dim,
hidden_dims=[128],
out_dim=1,
dropout=dropout,
)
self.derivative_shape_head = build_mlp(
in_dim=trunk_out_dim,
hidden_dims=[128],
out_dim=self.part_dim,
dropout=dropout,
)
# slope 是辅助输出,主要用于保持数据布局兼容。
self.slope_head = build_mlp(
in_dim=trunk_out_dim,
hidden_dims=[128],
out_dim=self.part_dim,
dropout=dropout,
)
@staticmethod
def center_shape(x: torch.Tensor) -> torch.Tensor:
"""去除每个样本 shape 分支的均值,让 level 分支专门学习整体偏移。"""
return x - x.mean(dim=1, keepdim=True)
def forward(self, params_x: torch.Tensor, schedule_x: torch.Tensor | None = None) -> torch.Tensor:
"""执行一次前向预测。
参数分支和流量制度分支先分别编码再在隐空间拼接融合主干提取共同特征后
压力和导数各自通过 level + centered shape 两个输出头生成slope 作为辅助通道
直接由单独输出头预测返回值仍保持预处理阶段约定的曲线拼接布局
"""
p = self.param_encoder(params_x)
if self.use_schedule:
if schedule_x is None:
raise ValueError("use_schedule=True但 forward 没有传入 schedule_x")
s = self.schedule_encoder(schedule_x)
# 两个分支在隐藏空间拼接,避免直接混合量纲差异很大的原始特征。
fused = torch.cat([p, s], dim=-1)
else:
fused = p
# trunk 负责学习参数-制度共同决定的曲线整体形态。
trunk_feat = self.trunk(fused)
pressure_level = self.pressure_level_head(trunk_feat) # [B, 1]
pressure_shape = self.pressure_shape_head(trunk_feat) # [B, T]
# shape 去均值后只表达相对形态,纵向偏移交给 level 分支学习。
pressure_shape = self.center_shape(pressure_shape)
pressure_pred = pressure_level + pressure_shape
derivative_level = self.derivative_level_head(trunk_feat) # [B, 1]
derivative_shape = self.derivative_shape_head(trunk_feat) # [B, T]
# 导数也采用 level + shape减少平台值和局部过渡段之间的相互牵制。
derivative_shape = self.center_shape(derivative_shape)
derivative_pred = derivative_level + derivative_shape
slope_pred = self.slope_head(trunk_feat) # [B, T]
curve_pred = torch.cat([pressure_pred, derivative_pred, slope_pred], dim=1)
return curve_pred