|
|
# -*- coding: utf-8 -*-
|
|
|
"""物理参数特征变换。
|
|
|
|
|
|
数值试井物理参数的量纲和数量级差异很大,例如渗透率、井筒储集系数和地层厚度
|
|
|
可能跨越多个数量级,而 skin 又可能为负值。直接把原始参数送入神经网络会增加
|
|
|
标准化和优化难度。
|
|
|
|
|
|
本模块把原始物理参数变换为更适合模型学习的特征空间,并保存可逆的 transform
|
|
|
元数据。训练后如果需要回到物理尺度,可以使用 inverse_transform_param_features
|
|
|
恢复原始参数含义。
|
|
|
"""
|
|
|
|
|
|
# pylint: disable=too-many-locals,invalid-name
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
# 部分物理参数跨越多个数量级,因此先做特征变换,再交给模型学习。
|
|
|
DEFAULT_PARAM_NAMES = ["k", "skin", "wellboreC", "phi", "h", "Cf"]
|
|
|
DEFAULT_LOG_PARAM_NAMES = {"k", "wellboreC", "h"}
|
|
|
DEFAULT_ASINH_PARAM_NAMES = {"skin"}
|
|
|
DEFAULT_COMPOSITE_FEATURES = [
|
|
|
"log10_kh",
|
|
|
"log10_phi_h",
|
|
|
"log10_k_over_phi",
|
|
|
"log10_wellboreC_over_h",
|
|
|
"log10_wellboreC_over_phi_h",
|
|
|
]
|
|
|
|
|
|
|
|
|
def _decode_names(raw_names: Any) -> list[str] | None:
|
|
|
"""把 HDF5 元数据中的参数名列表解码为字符串列表。"""
|
|
|
if raw_names is None:
|
|
|
return None
|
|
|
return [
|
|
|
item.decode("utf-8") if isinstance(item, (bytes, np.bytes_)) else str(item)
|
|
|
for item in raw_names
|
|
|
]
|
|
|
|
|
|
|
|
|
def build_param_feature_transform(
|
|
|
param_names: list[str] | None = None,
|
|
|
log_param_names: set[str] | None = None,
|
|
|
asinh_param_names: set[str] | None = None,
|
|
|
enabled: bool = True,
|
|
|
include_composite_features: bool = True,
|
|
|
) -> dict[str, Any]:
|
|
|
"""根据参数名建立正向/反向变换配置,例如对渗透率取 log10、对 skin 取 asinh。"""
|
|
|
names = list(param_names or DEFAULT_PARAM_NAMES)
|
|
|
log_names = set(DEFAULT_LOG_PARAM_NAMES if log_param_names is None else log_param_names)
|
|
|
asinh_names = set(DEFAULT_ASINH_PARAM_NAMES if asinh_param_names is None else asinh_param_names)
|
|
|
|
|
|
feature_names: list[str] = []
|
|
|
transforms: dict[str, str] = {}
|
|
|
for name in names:
|
|
|
# 单参数特征名称直接记录变换方式,便于训练后反查每一列的物理含义。
|
|
|
if enabled and name in log_names:
|
|
|
transforms[name] = "log10"
|
|
|
feature_names.append(f"log10_{name}")
|
|
|
elif enabled and name in asinh_names:
|
|
|
transforms[name] = "asinh"
|
|
|
feature_names.append(f"asinh_{name}")
|
|
|
else:
|
|
|
transforms[name] = "identity"
|
|
|
feature_names.append(name)
|
|
|
|
|
|
composite_features = list(DEFAULT_COMPOSITE_FEATURES) if (enabled and include_composite_features) else []
|
|
|
# 复合特征编码常见试井组合量,例如 kh、k/phi 和井筒储集相关比值。
|
|
|
feature_names.extend(composite_features)
|
|
|
|
|
|
return {
|
|
|
"enabled": bool(enabled),
|
|
|
"param_names": names,
|
|
|
"feature_names": feature_names,
|
|
|
"transforms": transforms,
|
|
|
"log_param_names": sorted(log_names),
|
|
|
"asinh_param_names": sorted(asinh_names),
|
|
|
"composite_features": composite_features,
|
|
|
"log_eps": 1.0e-30,
|
|
|
}
|
|
|
|
|
|
|
|
|
def param_feature_transform_from_meta(meta: dict[str, Any] | None) -> dict[str, Any] | None:
|
|
|
"""从预处理元数据恢复参数特征变换配置。"""
|
|
|
if not meta:
|
|
|
return None
|
|
|
transform = meta.get("param_feature_transform")
|
|
|
if transform is None:
|
|
|
return None
|
|
|
return dict(transform)
|
|
|
|
|
|
|
|
|
def transform_param_features(
|
|
|
params: np.ndarray,
|
|
|
transform: dict[str, Any] | None,
|
|
|
) -> np.ndarray:
|
|
|
"""把原始物理参数转换到更适合神经网络学习的特征空间。"""
|
|
|
x = np.asarray(params, dtype=np.float32)
|
|
|
if x.ndim != 2:
|
|
|
raise ValueError(f"params must be a 2D array, got shape={x.shape}")
|
|
|
|
|
|
if transform is None or not bool(transform.get("enabled", False)):
|
|
|
return x.astype(np.float32, copy=False)
|
|
|
|
|
|
names = list(transform.get("param_names") or DEFAULT_PARAM_NAMES)
|
|
|
transforms = dict(transform.get("transforms") or {})
|
|
|
if x.shape[1] != len(names):
|
|
|
raise ValueError(f"param feature transform expects {len(names)} columns, got {x.shape[1]}")
|
|
|
|
|
|
raw = x.astype(np.float64, copy=True)
|
|
|
out = raw.copy()
|
|
|
log_eps = float(transform.get("log_eps", 1.0e-30))
|
|
|
for col, name in enumerate(names):
|
|
|
mode = str(transforms.get(name, "identity")).lower()
|
|
|
if mode == "log10":
|
|
|
# 对跨数量级参数取 log10,减少大数值范围对 StandardScaler 和网络的压力。
|
|
|
out[:, col] = np.log10(np.maximum(out[:, col], log_eps))
|
|
|
elif mode == "asinh":
|
|
|
# skin 可能为负,asinh 保留符号且在大值区域近似对数。
|
|
|
out[:, col] = np.arcsinh(out[:, col])
|
|
|
elif mode == "identity":
|
|
|
continue
|
|
|
else:
|
|
|
raise ValueError(f"Unknown transform mode for {name}: {mode}")
|
|
|
|
|
|
composite_features = list(transform.get("composite_features") or [])
|
|
|
if composite_features:
|
|
|
name_to_col = {name: idx for idx, name in enumerate(names)}
|
|
|
|
|
|
def value(name: str) -> np.ndarray:
|
|
|
"""按原始参数名取出对应列;缺失时返回 NaN 以便复合特征显式失效。"""
|
|
|
idx = name_to_col.get(name)
|
|
|
if idx is None:
|
|
|
return np.full((raw.shape[0],), np.nan, dtype=np.float64)
|
|
|
return raw[:, idx]
|
|
|
|
|
|
k = value("k")
|
|
|
phi = value("phi")
|
|
|
wellboreC = value("wellboreC")
|
|
|
h = value("h")
|
|
|
|
|
|
composite_map = {
|
|
|
# 复合特征均使用原始物理量计算,再按需要取 log10。
|
|
|
"log10_kh": np.log10(np.maximum(k * h, log_eps)),
|
|
|
"log10_phi_h": np.log10(np.maximum(phi * h, log_eps)),
|
|
|
"log10_k_over_phi": np.log10(np.maximum(k / np.maximum(phi, log_eps), log_eps)),
|
|
|
"log10_wellboreC_over_h": np.log10(np.maximum(wellboreC / np.maximum(h, log_eps), log_eps)),
|
|
|
"log10_wellboreC_over_phi_h": np.log10(
|
|
|
np.maximum(wellboreC / np.maximum(phi * h, log_eps), log_eps)
|
|
|
),
|
|
|
}
|
|
|
extras = [composite_map[name] for name in composite_features if name in composite_map]
|
|
|
if extras:
|
|
|
out = np.concatenate([out, np.stack(extras, axis=1)], axis=1)
|
|
|
|
|
|
return out.astype(np.float32)
|
|
|
|
|
|
|
|
|
def inverse_transform_param_features(
|
|
|
features: np.ndarray,
|
|
|
transform: dict[str, Any] | None,
|
|
|
) -> np.ndarray:
|
|
|
"""把模型特征空间中的参数反变换回原始物理尺度。"""
|
|
|
x = np.asarray(features, dtype=np.float32)
|
|
|
if x.ndim != 2:
|
|
|
raise ValueError(f"features must be a 2D array, got shape={x.shape}")
|
|
|
|
|
|
if transform is None or not bool(transform.get("enabled", False)):
|
|
|
return x.astype(np.float32, copy=False)
|
|
|
|
|
|
names = list(transform.get("param_names") or DEFAULT_PARAM_NAMES)
|
|
|
transforms = dict(transform.get("transforms") or {})
|
|
|
if x.shape[1] < len(names):
|
|
|
raise ValueError(f"param inverse transform expects at least {len(names)} columns, got {x.shape[1]}")
|
|
|
|
|
|
out = x[:, : len(names)].astype(np.float64, copy=True)
|
|
|
for col, name in enumerate(names):
|
|
|
mode = str(transforms.get(name, "identity")).lower()
|
|
|
if mode == "log10":
|
|
|
out[:, col] = 10.0 ** out[:, col]
|
|
|
elif mode == "asinh":
|
|
|
out[:, col] = np.sinh(out[:, col])
|
|
|
elif mode == "identity":
|
|
|
continue
|
|
|
else:
|
|
|
raise ValueError(f"Unknown transform mode for {name}: {mode}")
|
|
|
return out.astype(np.float32)
|