from __future__ import annotations from typing import Any import numpy as np # 部分物理参数跨越多个数量级,因此先做特征变换,再交给模型学习。 DEFAULT_PARAM_NAMES = ["k", "skin", "wellboreC", "phi", "h", "Cf"] DEFAULT_LOG_PARAM_NAMES = {"k", "wellboreC", "h"} DEFAULT_ASINH_PARAM_NAMES = {"skin"} DEFAULT_COMPOSITE_FEATURES = [ "log10_kh", "log10_phi_h", "log10_k_over_phi", "log10_wellboreC_over_h", "log10_wellboreC_over_phi_h", ] def _decode_names(raw_names: Any) -> list[str] | None: if raw_names is None: return None return [ item.decode("utf-8") if isinstance(item, (bytes, np.bytes_)) else str(item) for item in raw_names ] def build_param_feature_transform( param_names: list[str] | None = None, log_param_names: set[str] | None = None, asinh_param_names: set[str] | None = None, enabled: bool = True, include_composite_features: bool = True, ) -> dict[str, Any]: """描述原始物理参数如何映射为模型输入特征。""" names = list(param_names or DEFAULT_PARAM_NAMES) log_names = set(DEFAULT_LOG_PARAM_NAMES if log_param_names is None else log_param_names) asinh_names = set(DEFAULT_ASINH_PARAM_NAMES if asinh_param_names is None else asinh_param_names) feature_names: list[str] = [] transforms: dict[str, str] = {} for name in names: if enabled and name in log_names: transforms[name] = "log10" feature_names.append(f"log10_{name}") elif enabled and name in asinh_names: transforms[name] = "asinh" feature_names.append(f"asinh_{name}") else: transforms[name] = "identity" feature_names.append(name) composite_features = list(DEFAULT_COMPOSITE_FEATURES) if (enabled and include_composite_features) else [] feature_names.extend(composite_features) return { "enabled": bool(enabled), "param_names": names, "feature_names": feature_names, "transforms": transforms, "log_param_names": sorted(log_names), "asinh_param_names": sorted(asinh_names), "composite_features": composite_features, "log_eps": 1.0e-30, } def param_feature_transform_from_meta(meta: dict[str, Any] | None) -> dict[str, Any] | None: if not meta: return None transform = meta.get("param_feature_transform") if transform is None: return None return dict(transform) def transform_param_features( params: np.ndarray, transform: dict[str, Any] | None, ) -> np.ndarray: """在标准化之前应用参数特征变换。""" x = np.asarray(params, dtype=np.float32) if x.ndim != 2: raise ValueError(f"params must be a 2D array, got shape={x.shape}") if transform is None or not bool(transform.get("enabled", False)): return x.astype(np.float32, copy=False) names = list(transform.get("param_names") or DEFAULT_PARAM_NAMES) transforms = dict(transform.get("transforms") or {}) if x.shape[1] != len(names): raise ValueError(f"param feature transform expects {len(names)} columns, got {x.shape[1]}") raw = x.astype(np.float64, copy=True) out = raw.copy() log_eps = float(transform.get("log_eps", 1.0e-30)) for col, name in enumerate(names): mode = str(transforms.get(name, "identity")).lower() if mode == "log10": out[:, col] = np.log10(np.maximum(out[:, col], log_eps)) elif mode == "asinh": out[:, col] = np.arcsinh(out[:, col]) elif mode == "identity": continue else: raise ValueError(f"Unknown transform mode for {name}: {mode}") composite_features = list(transform.get("composite_features") or []) if composite_features: name_to_col = {name: idx for idx, name in enumerate(names)} def value(name: str) -> np.ndarray: idx = name_to_col.get(name) if idx is None: return np.full((raw.shape[0],), np.nan, dtype=np.float64) return raw[:, idx] k = value("k") phi = value("phi") wellboreC = value("wellboreC") h = value("h") composite_map = { "log10_kh": np.log10(np.maximum(k * h, log_eps)), "log10_phi_h": np.log10(np.maximum(phi * h, log_eps)), "log10_k_over_phi": np.log10(np.maximum(k / np.maximum(phi, log_eps), log_eps)), "log10_wellboreC_over_h": np.log10(np.maximum(wellboreC / np.maximum(h, log_eps), log_eps)), "log10_wellboreC_over_phi_h": np.log10( np.maximum(wellboreC / np.maximum(phi * h, log_eps), log_eps) ), } extras = [composite_map[name] for name in composite_features if name in composite_map] if extras: out = np.concatenate([out, np.stack(extras, axis=1)], axis=1) return out.astype(np.float32) def inverse_transform_param_features( features: np.ndarray, transform: dict[str, Any] | None, ) -> np.ndarray: """将变换后的参数特征还原为物理量。""" x = np.asarray(features, dtype=np.float32) if x.ndim != 2: raise ValueError(f"features must be a 2D array, got shape={x.shape}") if transform is None or not bool(transform.get("enabled", False)): return x.astype(np.float32, copy=False) names = list(transform.get("param_names") or DEFAULT_PARAM_NAMES) transforms = dict(transform.get("transforms") or {}) if x.shape[1] < len(names): raise ValueError(f"param inverse transform expects at least {len(names)} columns, got {x.shape[1]}") out = x[:, : len(names)].astype(np.float64, copy=True) for col, name in enumerate(names): mode = str(transforms.get(name, "identity")).lower() if mode == "log10": out[:, col] = 10.0 ** out[:, col] elif mode == "asinh": out[:, col] = np.sinh(out[:, col]) elif mode == "identity": continue else: raise ValueError(f"Unknown transform mode for {name}: {mode}") return out.astype(np.float32)