1、移除时间条件模型 2、移除不确定性量化相关代码 3、移除160维slope 4、整理代码

feature/ModelOpt20260526
1294271022 5 hours ago
parent fabffd56a1
commit 88b47c3359

@ -59,7 +59,6 @@ curve_processing: # 双对数曲线清洗与重采样设置
n_time_points: 160 # 每条双对数曲线统一重采样的时间点数
min_valid_points: 40 # 有效曲线至少需要的原始点数
feature_epsilon: 1.0e-12 # 取对数和除法时使用的最小保护值
use_slope_feature: true # 是否额外拼接压力曲线斜率特征
max_time_cap: 3000.0 # 曲线重采样允许的最大时间上限
min_time_floor: 1.0e-6 # 曲线重采样允许的最小时间下限
fixed_time_range: null # 固定重采样时间范围null 表示按样本自身范围

@ -59,7 +59,6 @@ curve_processing: # 双对数曲线清洗与重采样设置
n_time_points: 160 # 每条双对数曲线统一重采样的时间点数
min_valid_points: 40 # 有效曲线至少需要的原始点数
feature_epsilon: 1.0e-12 # 取对数和除法时使用的最小保护值
use_slope_feature: true # 是否额外拼接压力曲线斜率特征
max_time_cap: 3000.0 # 曲线重采样允许的最大时间上限
min_time_floor: 1.0e-6 # 曲线重采样允许的最小时间下限
fixed_time_range: null # 固定重采样时间范围null 表示按样本自身范围

@ -59,7 +59,6 @@ curve_processing: # 双对数曲线清洗与重采样设置
n_time_points: 160 # 每条双对数曲线统一重采样的时间点数
min_valid_points: 40 # 有效曲线至少需要的原始点数
feature_epsilon: 1.0e-12 # 取对数和除法时使用的最小保护值
use_slope_feature: true # 是否额外拼接压力曲线斜率特征
max_time_cap: 3000.0 # 曲线重采样允许的最大时间上限
min_time_floor: 1.0e-6 # 曲线重采样允许的最小时间下限
fixed_time_range: null # 固定重采样时间范围null 表示按样本自身范围

@ -77,7 +77,6 @@ curve_processing: # 双对数曲线清洗与重采样设置
n_time_points: 160 # 每条双对数曲线统一重采样的时间点数
min_valid_points: 40 # 有效曲线至少需要的原始点数
feature_epsilon: 1.0e-12 # 取对数和除法时使用的最小保护值
use_slope_feature: true # 是否额外拼接压力曲线斜率特征
max_time_cap: 3000.0 # 曲线重采样允许的最大时间上限
min_time_floor: 1.0e-6 # 曲线重采样允许的最小时间下限
fixed_time_range: null # 固定重采样时间范围null 表示按样本自身范围

@ -77,7 +77,6 @@ curve_processing: # 双对数曲线清洗与重采样设置
n_time_points: 160 # 每条双对数曲线统一重采样的时间点数
min_valid_points: 40 # 有效曲线至少需要的原始点数
feature_epsilon: 1.0e-12 # 取对数和除法时使用的最小保护值
use_slope_feature: true # 是否额外拼接压力曲线斜率特征
max_time_cap: 3000.0 # 曲线重采样允许的最大时间上限
min_time_floor: 1.0e-6 # 曲线重采样允许的最小时间下限
fixed_time_range: null # 固定重采样时间范围null 表示按样本自身范围

@ -1,455 +0,0 @@
"""
分析集成代理模型不确定性结果
评估 uncertainty 是否能够识别高误差样本
并生成 retention curve风险样本统计和 summary 输出
"""
from __future__ import annotations
import argparse
import csv
import json
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import matplotlib.pyplot as plt
import numpy as np
# 获取当前脚本所在目录的上一级项目根目录。
# 这里假设脚本位于项目的某个子目录中,因此 parents[1] 指向项目根目录。
ROOT = Path(__file__).resolve().parents[1]
# 将项目根目录加入 Python 搜索路径,方便后续导入项目内部模块。
# 当前脚本虽然没有直接导入内部模块,但保留该设置可以兼容项目结构。
sys.path.append(str(ROOT))
Row = dict[str, str]
OutputRow = dict[str, Any]
@dataclass(frozen=True)
class Metrics:
"""保存样本级误差和不确定性指标,减少 main 函数中的局部变量数量。"""
rmse: np.ndarray
mae: np.ndarray
r2: np.ndarray
unc: np.ndarray
@dataclass(frozen=True)
class UncertaintyThresholds:
"""保存低/高不确定性阈值。"""
low: float
high: float
@dataclass(frozen=True)
class RiskRows:
"""保存几类需要导出的风险样本。"""
top_uncertain: list[Row]
high_error_high_unc: list[Row]
high_error_low_unc: list[Row]
def parse_args() -> argparse.Namespace:
"""解析不确定性评估结果 CSV、输出目录以及分位数分箱参数。"""
parser = argparse.ArgumentParser(
description="Analyze ensemble UQ outputs for fallback usefulness"
)
parser.add_argument(
"--input-csv",
type=str,
default=None,
help="Path to sample_uncertainty_metrics.csv",
)
parser.add_argument(
"--output-dir",
type=str,
default=None,
help="Optional output directory",
)
parser.add_argument(
"--tag",
type=str,
default="family_random_50k",
help="Experiment tag",
)
parser.add_argument(
"--quantiles",
type=str,
default="0,5,10,20,30,40,50",
help="Comma-separated uncertainty removal percentages",
)
parser.add_argument(
"--top-k",
type=int,
default=100,
help="Top-K risky samples to export",
)
parser.add_argument(
"--high-error-rmse",
type=float,
default=1.0,
help="High-error RMSE threshold",
)
parser.add_argument(
"--low-unc-quantile",
type=float,
default=50.0,
help="Low uncertainty quantile",
)
parser.add_argument(
"--high-unc-quantile",
type=float,
default=90.0,
help="High uncertainty quantile",
)
return parser.parse_args()
def parse_quantiles(text: str) -> list[float]:
"""解析逗号分隔的剔除比例,用于不确定性保留曲线分析。"""
values = []
for item in str(text).split(","):
item = item.strip()
if not item:
continue
q = float(item)
if q < 0 or q >= 100:
raise ValueError(f"非法 quantile 百分比: {q}")
values.append(q)
if 0.0 not in values:
values = [0.0] + values
return sorted(set(values))
def default_input_csv(tag: str) -> Path:
"""根据实验标签定位集成评估生成的 sample_uncertainty_metrics.csv。"""
return Path("results") / f"evaluation_{tag}_ensemble_uq" / "sample_uncertainty_metrics.csv"
def default_output_dir(tag: str) -> Path:
"""根据实验标签生成当前分析脚本默认的输出目录。"""
return Path("results") / f"evaluation_{tag}_ensemble_uq_analysis"
def resolve_paths(args: argparse.Namespace) -> tuple[Path, Path]:
"""根据命令行参数解析输入 CSV 和输出目录。"""
tag = str(args.tag).strip()
input_csv = Path(args.input_csv) if args.input_csv else default_input_csv(tag)
output_dir = Path(args.output_dir) if args.output_dir else default_output_dir(tag)
return input_csv, output_dir
def load_rows(csv_path: Path) -> list[Row]:
"""读取 CSV 为字典行列表,保留每个样本的不确定性和误差字段。"""
with open(csv_path, "r", encoding="utf-8-sig", newline="") as file:
rows = list(csv.DictReader(file))
if not rows:
raise ValueError(f"CSV 没有数据: {csv_path}")
return rows
def as_float(rows: list[Row], key: str) -> np.ndarray:
"""将 CSV 字段转为 float 数组,用于后续统计分析。"""
return np.array([float(row[key]) for row in rows], dtype=np.float64)
def extract_metrics(rows: list[Row]) -> Metrics:
"""从 CSV 行中提取样本级误差和不确定性指标。"""
return Metrics(
rmse=as_float(rows, "overall_rmse"),
mae=as_float(rows, "overall_mae"),
r2=as_float(rows, "overall_r2"),
unc=as_float(rows, "unc_mean_std"),
)
def safe_mean(x: np.ndarray) -> float:
"""计算均值;没有有效数据时返回 NaN。"""
return float(np.mean(x)) if x.size else np.nan
def safe_median(x: np.ndarray) -> float:
"""计算中位数;没有有效数据时返回 NaN。"""
return float(np.median(x)) if x.size else np.nan
def safe_percentile(x: np.ndarray, q: float) -> float:
"""计算百分位数;没有有效数据时返回 NaN。"""
return float(np.percentile(x, q)) if x.size else np.nan
def save_csv(path: Path, rows: list[OutputRow] | list[Row]) -> None:
"""把字典行写入 CSV。"""
if not rows:
return
with open(path, "w", encoding="utf-8-sig", newline="") as file:
writer = csv.DictWriter(file, fieldnames=list(rows[0].keys()))
writer.writeheader()
writer.writerows(rows)
def build_thresholds(
unc: np.ndarray,
low_unc_quantile: float,
high_unc_quantile: float,
) -> UncertaintyThresholds:
"""根据样本不确定性分布计算低/高不确定性阈值。"""
return UncertaintyThresholds(
low=float(np.percentile(unc, low_unc_quantile)),
high=float(np.percentile(unc, high_unc_quantile)),
)
def build_retention_curve(metrics: Metrics, quantiles: list[float]) -> list[OutputRow]:
"""构造按不确定性由高到低剔除样本后的误差统计曲线。"""
curve_rows: list[OutputRow] = []
for removed_pct in quantiles:
threshold = float(np.percentile(metrics.unc, 100.0 - removed_pct))
keep_mask = metrics.unc <= threshold
kept_rmse = metrics.rmse[keep_mask]
kept_mae = metrics.mae[keep_mask]
kept_r2 = metrics.r2[keep_mask]
curve_rows.append(
{
"removed_pct": float(removed_pct),
"retained_pct": float(100.0 * np.mean(keep_mask)),
"n_retained": int(np.sum(keep_mask)),
"rmse_mean": safe_mean(kept_rmse),
"rmse_median": safe_median(kept_rmse),
"rmse_p90": safe_percentile(kept_rmse, 90),
"mae_mean": safe_mean(kept_mae),
"mae_median": safe_median(kept_mae),
"r2_mean": safe_mean(kept_r2),
"r2_median": safe_median(kept_r2),
"unc_threshold": threshold,
}
)
return curve_rows
def select_top_uncertain_rows(rows: list[Row], unc: np.ndarray, top_k: int) -> list[Row]:
"""按不确定性从大到小导出 Top-K 样本。"""
top_uncertain_indices = np.argsort(-unc)[: min(top_k, len(rows))]
return [rows[int(index)] for index in top_uncertain_indices]
def classify_risky_rows(
rows: list[Row],
thresholds: UncertaintyThresholds,
high_error_rmse: float,
) -> tuple[list[Row], list[Row]]:
"""识别高误差高不确定性样本,以及高误差低不确定性样本。"""
high_error_high_unc_rows = []
high_error_low_unc_rows = []
for row in rows:
row_rmse = float(row["overall_rmse"])
row_unc = float(row["unc_mean_std"])
if row_rmse >= high_error_rmse and row_unc >= thresholds.high:
high_error_high_unc_rows.append(row)
if row_rmse >= high_error_rmse and row_unc <= thresholds.low:
high_error_low_unc_rows.append(row)
high_error_high_unc_rows.sort(
key=lambda row: float(row["overall_rmse"]),
reverse=True,
)
high_error_low_unc_rows.sort(
key=lambda row: float(row["overall_rmse"]),
reverse=True,
)
return high_error_high_unc_rows, high_error_low_unc_rows
def build_risk_rows(
rows: list[Row],
metrics: Metrics,
thresholds: UncertaintyThresholds,
high_error_rmse: float,
top_k: int,
) -> RiskRows:
"""构造所有需要导出的风险样本集合。"""
high_error_high_unc_rows, high_error_low_unc_rows = classify_risky_rows(
rows=rows,
thresholds=thresholds,
high_error_rmse=high_error_rmse,
)
return RiskRows(
top_uncertain=select_top_uncertain_rows(rows, metrics.unc, top_k),
high_error_high_unc=high_error_high_unc_rows,
high_error_low_unc=high_error_low_unc_rows,
)
def build_summary(
input_csv: Path,
metrics: Metrics,
thresholds: UncertaintyThresholds,
risk_rows: RiskRows,
args: argparse.Namespace,
) -> dict[str, Any]:
"""汇总全局统计信息和关键阈值,方便后续写入 JSON。"""
return {
"input_csv": str(input_csv),
"n_samples": int(metrics.rmse.size),
"global": {
"rmse_mean": safe_mean(metrics.rmse),
"rmse_median": safe_median(metrics.rmse),
"rmse_p90": safe_percentile(metrics.rmse, 90),
"mae_mean": safe_mean(metrics.mae),
"r2_mean": safe_mean(metrics.r2),
"unc_mean": safe_mean(metrics.unc),
"unc_median": safe_median(metrics.unc),
"unc_p90": safe_percentile(metrics.unc, 90),
},
"thresholds": {
"high_error_rmse": float(args.high_error_rmse),
"low_unc_quantile": float(args.low_unc_quantile),
"low_unc_threshold": thresholds.low,
"high_unc_quantile": float(args.high_unc_quantile),
"high_unc_threshold": thresholds.high,
},
"counts": {
"top_uncertain_exported": len(risk_rows.top_uncertain),
"high_error_high_unc": len(risk_rows.high_error_high_unc),
"high_error_low_unc": len(risk_rows.high_error_low_unc),
},
}
def plot_retention_curve(curve_rows: list[OutputRow], output_path: Path) -> None:
"""绘制按不确定性由高到低剔除样本后的误差变化。"""
removed = np.array(
[float(row["removed_pct"]) for row in curve_rows],
dtype=np.float64,
)
retained = np.array(
[float(row["retained_pct"]) for row in curve_rows],
dtype=np.float64,
)
rmse = np.array([float(row["rmse_mean"]) for row in curve_rows], dtype=np.float64)
mae = np.array([float(row["mae_mean"]) for row in curve_rows], dtype=np.float64)
r2 = np.array([float(row["r2_mean"]) for row in curve_rows], dtype=np.float64)
fig, axes = plt.subplots(1, 3, figsize=(15, 4.5))
axes[0].plot(retained, rmse, marker="o")
axes[0].set_title("Retained vs RMSE")
axes[0].set_xlabel("Retained Samples (%)")
axes[0].set_ylabel("RMSE mean")
axes[0].grid(True, alpha=0.3)
axes[1].plot(retained, mae, marker="o")
axes[1].set_title("Retained vs MAE")
axes[1].set_xlabel("Retained Samples (%)")
axes[1].set_ylabel("MAE mean")
axes[1].grid(True, alpha=0.3)
axes[2].plot(retained, r2, marker="o")
axes[2].set_title("Retained vs R2")
axes[2].set_xlabel("Retained Samples (%)")
axes[2].set_ylabel("R2 mean")
axes[2].grid(True, alpha=0.3)
removed_grid = ",".join(str(int(value)) for value in removed)
fig.suptitle(f"Uncertainty Filtering Curves | removed grid={removed_grid}%")
plt.tight_layout(rect=[0, 0, 1, 0.94])
plt.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close()
def save_outputs(
output_dir: Path,
curve_rows: list[OutputRow],
risk_rows: RiskRows,
summary: dict[str, Any],
) -> None:
"""保存 JSON、CSV 和不确定性过滤曲线图。"""
with open(output_dir / "uq_fallback_summary.json", "w", encoding="utf-8") as file:
json.dump(summary, file, ensure_ascii=False, indent=2)
save_csv(output_dir / "uncertainty_filter_curve.csv", curve_rows)
save_csv(output_dir / "top_uncertain_samples.csv", risk_rows.top_uncertain)
save_csv(output_dir / "high_error_high_unc_samples.csv", risk_rows.high_error_high_unc)
save_csv(output_dir / "high_error_low_unc_samples.csv", risk_rows.high_error_low_unc)
plot_retention_curve(curve_rows, output_dir / "uncertainty_filter_curve.png")
def print_summary(output_dir: Path, summary: dict[str, Any]) -> None:
"""在控制台打印简要结果,便于命令行运行后快速判断分析是否完成。"""
print("UQ fallback analysis complete.")
print(f"Output dir: {output_dir}")
print(
f"Global RMSE mean={summary['global']['rmse_mean']:.6f}, "
f"unc mean={summary['global']['unc_mean']:.6f}"
)
print(
f"High-error & high-unc count={summary['counts']['high_error_high_unc']}, "
f"high-error & low-unc count={summary['counts']['high_error_low_unc']}"
)
def main() -> None:
"""分析集成不确定性与真实误差的相关性,并生成分析产物。"""
args = parse_args()
input_csv, output_dir = resolve_paths(args)
output_dir.mkdir(parents=True, exist_ok=True)
rows = load_rows(input_csv)
metrics = extract_metrics(rows)
quantiles = parse_quantiles(args.quantiles)
thresholds = build_thresholds(
unc=metrics.unc,
low_unc_quantile=float(args.low_unc_quantile),
high_unc_quantile=float(args.high_unc_quantile),
)
curve_rows = build_retention_curve(metrics, quantiles)
risk_rows = build_risk_rows(
rows=rows,
metrics=metrics,
thresholds=thresholds,
high_error_rmse=float(args.high_error_rmse),
top_k=int(args.top_k),
)
summary = build_summary(
input_csv=input_csv,
metrics=metrics,
thresholds=thresholds,
risk_rows=risk_rows,
args=args,
)
save_outputs(output_dir, curve_rows, risk_rows, summary)
print_summary(output_dir, summary)
if __name__ == "__main__":
main()

@ -1,371 +0,0 @@
"""
将集成学习不确定性量化样本指标与处理后的数据集元数据进行关联
该脚本为每个 UQ 指标行补充调度元数据类别标签以及可选的源标签然后输出分组汇总结果和困难样本
"""
from __future__ import annotations
import argparse
import csv
import json
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import joblib
import numpy as np
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
def load_experiment_path_helpers():
"""Load project path helpers after adding project root to sys.path."""
# pylint: disable=import-error,import-outside-toplevel
from src.common.experiment_paths import normalize_tag, processed_path_for_tag
return normalize_tag, processed_path_for_tag
normalize_tag_func, processed_path_func = load_experiment_path_helpers()
@dataclass(frozen=True)
class AnalysisPaths:
"""Resolved input and output paths used by the metadata analysis."""
tag: str
processed_path: Path
uq_csv: Path
output_dir: Path
@dataclass(frozen=True)
class ProcessedMetadata:
"""Metadata arrays loaded from the processed dataset."""
schedule_meta_test: np.ndarray
family_name_test: np.ndarray
source_name_test: np.ndarray | None
source_id_test: np.ndarray | None
meta_names: list[str]
name_to_col: dict[str, int]
def parse_args() -> argparse.Namespace:
"""解析 UQ 指标 CSV、processed 数据和输出路径,用于合并样本元数据分析。"""
parser = argparse.ArgumentParser(
description="Join UQ sample metrics with saved metadata"
)
parser.add_argument(
"--processed",
type=str,
default=None,
help="Processed dataset path",
)
parser.add_argument(
"--uq-csv",
type=str,
default=None,
help="sample_uncertainty_metrics.csv path",
)
parser.add_argument(
"--tag",
type=str,
default="family_random_50k",
help="Experiment tag",
)
parser.add_argument(
"--output-dir",
type=str,
default=None,
help="Output directory",
)
parser.add_argument("--high-error-rmse", type=float, default=1.0)
return parser.parse_args()
def default_uq_csv(tag: str) -> Path:
"""根据实验标签定位默认的不确定性指标 CSV。"""
return (
Path("results")
/ f"evaluation_{tag}_ensemble_uq"
/ "sample_uncertainty_metrics.csv"
)
def default_output_dir(tag: str) -> Path:
"""根据实验标签生成当前分析脚本默认的输出目录。"""
return Path("results") / f"evaluation_{tag}_ensemble_uq_metadata_analysis"
def resolve_paths(args: argparse.Namespace) -> AnalysisPaths:
"""根据命令行参数和实验标签解析输入输出路径。"""
tag = normalize_tag_func(args.tag)
fallback_tag = tag or "family_random_50k"
processed_path = (
Path(args.processed)
if args.processed is not None
else processed_path_func(tag)
)
uq_csv = (
Path(args.uq_csv)
if args.uq_csv is not None
else default_uq_csv(fallback_tag)
)
output_dir = (
Path(args.output_dir)
if args.output_dir is not None
else default_output_dir(fallback_tag)
)
return AnalysisPaths(
tag=tag,
processed_path=processed_path,
uq_csv=uq_csv,
output_dir=output_dir,
)
def save_csv(path: Path, rows: list[dict[str, Any]]) -> None:
"""把字典行写入 CSV当没有行时不写文件。"""
if not rows:
return
with open(path, "w", encoding="utf-8-sig", newline="") as f:
writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
writer.writeheader()
writer.writerows(rows)
def load_uq_rows(uq_csv: Path) -> list[dict[str, str]]:
"""读取 sample_uncertainty_metrics.csv。"""
with open(uq_csv, "r", encoding="utf-8-sig", newline="") as f:
rows = list(csv.DictReader(f))
if not rows:
raise ValueError(f"UQ CSV 没有数据: {uq_csv}")
return rows
def load_processed_metadata(processed_path: Path) -> ProcessedMetadata:
"""读取 processed 数据中的测试集元数据。"""
data = joblib.load(processed_path)
required_keys = {"schedule_meta_test", "family_name_test"}
if not required_keys.issubset(data):
raise RuntimeError(
"Processed dataset does not contain schedule metadata. "
"Re-run preprocess on a metadata-rich raw HDF5 first."
)
schedule_meta_test = np.asarray(data["schedule_meta_test"], dtype=np.float32)
family_name_test = np.asarray(data["family_name_test"]).astype(str)
source_name_test = None
source_id_test = None
if "source_name_test" in data:
source_name_test = np.asarray(data["source_name_test"]).astype(str)
if "source_id_test" in data:
source_id_test = np.asarray(data["source_id_test"])
meta_names = data["meta"].get("schedule_meta_names") or []
name_to_col = {name: i for i, name in enumerate(meta_names)}
return ProcessedMetadata(
schedule_meta_test=schedule_meta_test,
family_name_test=family_name_test,
source_name_test=source_name_test,
source_id_test=source_id_test,
meta_names=meta_names,
name_to_col=name_to_col,
)
def enrich_uq_rows(
uq_rows: list[dict[str, str]],
metadata: ProcessedMetadata,
) -> list[dict[str, Any]]:
"""把 UQ 指标与 family/source/schedule metadata 拼接到同一行。"""
enriched_rows: list[dict[str, Any]] = []
for row in uq_rows:
idx = int(row["idx"])
meta_row = metadata.schedule_meta_test[idx]
enriched: dict[str, Any] = dict(row)
enriched["family_name"] = metadata.family_name_test[idx]
if metadata.source_name_test is not None:
enriched["source_name"] = metadata.source_name_test[idx]
if metadata.source_id_test is not None:
enriched["source_id"] = int(metadata.source_id_test[idx])
for name, col in metadata.name_to_col.items():
enriched[name] = float(meta_row[col])
enriched_rows.append(enriched)
return enriched_rows
def summarize_group(
rows: list[dict[str, Any]],
group_key: str,
) -> list[dict[str, Any]]:
"""对一个样本分组计算误差、不确定性和样本数量等汇总指标。"""
groups: dict[str, list[dict[str, Any]]] = {}
for row in rows:
groups.setdefault(str(row[group_key]), []).append(row)
summaries = []
sorted_groups = sorted(
groups.items(),
key=lambda item: len(item[1]),
reverse=True,
)
for key, group_rows in sorted_groups:
rmse = np.array(
[float(row["overall_rmse"]) for row in group_rows],
dtype=np.float64,
)
unc = np.array(
[float(row["unc_mean_std"]) for row in group_rows],
dtype=np.float64,
)
summaries.append(
{
group_key: key,
"n_samples": len(group_rows),
"rmse_mean": float(np.mean(rmse)),
"rmse_median": float(np.median(rmse)),
"unc_mean": float(np.mean(unc)),
"unc_median": float(np.median(unc)),
}
)
return summaries
def save_group_summaries(
output_dir: Path,
enriched_rows: list[dict[str, Any]],
metadata: ProcessedMetadata,
) -> None:
"""按 family/source/n_prod 等维度保存分组统计。"""
save_csv(
output_dir / "summary_by_family.csv",
summarize_group(enriched_rows, "family_name"),
)
if metadata.source_name_test is not None:
save_csv(
output_dir / "summary_by_source.csv",
summarize_group(enriched_rows, "source_name"),
)
if "n_prod" in metadata.name_to_col:
for row in enriched_rows:
row["n_prod_group"] = int(round(float(row["n_prod"])))
save_csv(
output_dir / "summary_by_n_prod.csv",
summarize_group(enriched_rows, "n_prod_group"),
)
def find_high_error_low_unc(
enriched_rows: list[dict[str, Any]],
high_error_rmse: float,
) -> list[dict[str, Any]]:
"""筛出高误差但低不确定性的危险样本。"""
unc_values = np.array(
[float(row["unc_mean_std"]) for row in enriched_rows],
dtype=np.float64,
)
low_unc_threshold = float(np.median(unc_values))
risky_rows = [
row
for row in enriched_rows
if float(row["overall_rmse"]) >= high_error_rmse
and float(row["unc_mean_std"]) <= low_unc_threshold
]
risky_rows.sort(key=lambda row: float(row["overall_rmse"]), reverse=True)
return risky_rows
def write_json_summary(
output_dir: Path,
paths: AnalysisPaths,
metadata: ProcessedMetadata,
enriched_rows: list[dict[str, Any]],
high_error_low_unc: list[dict[str, Any]],
) -> None:
"""保存 metadata join 的整体摘要信息。"""
summary = {
"tag": paths.tag,
"processed_path": str(paths.processed_path),
"uq_csv": str(paths.uq_csv),
"n_samples": len(enriched_rows),
"meta_names": metadata.meta_names,
"has_source_name": bool(metadata.source_name_test is not None),
"high_error_low_unc_count": len(high_error_low_unc),
}
with open(output_dir / "metadata_join_summary.json", "w", encoding="utf-8") as f:
json.dump(summary, f, ensure_ascii=False, indent=2)
def run_analysis(args: argparse.Namespace) -> tuple[Path, int]:
"""执行完整 UQ metadata join 分析流程。"""
paths = resolve_paths(args)
paths.output_dir.mkdir(parents=True, exist_ok=True)
metadata = load_processed_metadata(paths.processed_path)
uq_rows = load_uq_rows(paths.uq_csv)
enriched_rows = enrich_uq_rows(uq_rows, metadata)
save_csv(paths.output_dir / "uq_samples_with_metadata.csv", enriched_rows)
save_group_summaries(paths.output_dir, enriched_rows, metadata)
high_error_low_unc = find_high_error_low_unc(
enriched_rows=enriched_rows,
high_error_rmse=float(args.high_error_rmse),
)
save_csv(
paths.output_dir / "high_error_low_unc_with_metadata.csv",
high_error_low_unc,
)
write_json_summary(
output_dir=paths.output_dir,
paths=paths,
metadata=metadata,
enriched_rows=enriched_rows,
high_error_low_unc=high_error_low_unc,
)
return paths.output_dir, len(high_error_low_unc)
def main() -> None:
"""命令行入口:拼接 UQ 结果和元数据,并输出汇总文件。"""
output_dir, risky_count = run_analysis(parse_args())
print("UQ metadata join analysis complete.")
print(f"Output dir: {output_dir}")
print(f"High-error low-unc count={risky_count}")
if __name__ == "__main__":
main()

@ -41,7 +41,7 @@ from src.common.experiment_paths import (
normalize_tag,
processed_path_for_tag,
)
from src.data.curve_processing import clean_curve_for_dataset, is_valid_curve, resample_curve_to_features
from src.data.curve_processing import clean_curve_for_dataset, is_valid_curve, resample_curve_to_model_features
from src.data.param_features import param_feature_transform_from_meta, transform_param_features
from src.data.params import Params, Schedule
from src.data.runner_client import CppRunner, read_result_bin
@ -164,29 +164,25 @@ def calc_metrics(
def infer_curve_layout(meta: dict, curve_dim: int) -> dict:
"""从元数据读取曲线分段布局;旧数据没有布局时按压力/导数/斜率三等分回退。"""
# curve_layout 描述了拼接曲线的结构布局
# 包括每一段 pressure/derivative/slope 的起止位置
"""从元数据读取布局;缺失时兼容推断双通道或旧三通道布局。"""
curve_layout = meta.get("curve_layout")
if curve_layout is not None:
return curve_layout
# 兼容早期 processed 文件:没有显式 layout 时仍按 pressure/derivative/slope 三段切分。
n_time_points = curve_dim // 3
n_parts = 3 if curve_dim % 3 == 0 else 2
n_time_points = curve_dim // n_parts
names = ["log_pressure", "log_derivative"] + (["slope"] if n_parts == 3 else [])
return {
"n_time_points": int(n_time_points),
"parts": [
{"name": "log_pressure", "start": 0, "end": n_time_points},
{"name": "log_derivative", "start": n_time_points, "end": 2 * n_time_points},
{"name": "slope", "start": 2 * n_time_points, "end": 3 * n_time_points},
{"name": name, "start": i * n_time_points, "end": (i + 1) * n_time_points}
for i, name in enumerate(names)
],
}
def split_curve_by_layout(curve: np.ndarray, layout: dict) -> dict[str, np.ndarray]:
"""按照 curve_layout 将拼接曲线拆成 log_pressure、log_derivative 和 slope 三段。"""
# parts 用于保存拆分后的不同曲线段
# 例如 log_pressure / derivative / slope
"""按照 curve_layout 拆分曲线。"""
parts: dict[str, np.ndarray] = {}
for part in layout["parts"]:
start = int(part["start"])
@ -310,7 +306,12 @@ def build_params_from_args(args: argparse.Namespace, cfg: Config, schedule: Sche
)
def run_solver_and_extract_curve(cfg: Config, params: Params, well_index: int) -> tuple[np.ndarray, dict]:
def run_solver_and_extract_curve(
cfg: Config,
params: Params,
well_index: int,
target_curve_dim: int,
) -> tuple[np.ndarray, dict]:
"""调用 C++ 求解器运行一次正演,并把双对数输出重采样为模型曲线向量。"""
# 创建 C++ 求解器客户端
# 实际底层会调用数值试井求解程序
@ -351,7 +352,9 @@ def run_solver_and_extract_curve(cfg: Config, params: Params, well_index: int) -
# 将原始不规则时间曲线重采样到固定特征网格
# 这是代理模型训练时使用的统一输入格式
curve_feat = resample_curve_to_features(cfg, t_clean, p_clean, d_clean)
curve_feat = resample_curve_to_model_features(
cfg, t_clean, p_clean, d_clean, target_curve_dim
)
raw = {
"t": t_clean.tolist(),
"p": p_clean.tolist(),
@ -449,16 +452,15 @@ def plot_comparison(
# 用于衡量双对数曲线形态是否一致
autofit = dual_log_objective(curve_true, curve_pred, curve_layout)
part_names = ["log_pressure", "log_derivative", "slope"]
part_names = [str(part["name"]) for part in curve_layout["parts"]]
title_map = {
"log_pressure": "Log Pressure",
"log_derivative": "Log |Derivative|",
"slope": "Slope of Log Pressure vs Log Time",
}
r2_text = "nan" if np.isnan(overall["r2"]) else f"{overall['r2']:.4f}"
fig, axes = plt.subplots(3, 2, figsize=(14, 12))
fig, axes = plt.subplots(len(part_names), 2, figsize=(14, 4 * len(part_names)))
fig.suptitle(
"Solver vs Surrogate\n"
f"k={params.k:.6g}, skin={params.skin:.6g}, wellboreC={params.wellboreC:.6g}, "
@ -472,7 +474,7 @@ def plot_comparison(
summary = {"overall": overall, "autofit": autofit, "parts": {}}
# 分别对 pressure / derivative / slope 三部分绘图
# 分别对各曲线通道绘图。
for row, name in enumerate(part_names):
# 左列画曲线重合程度,右列画逐时间点残差,便于定位误差集中在哪个时期。
y_true = true_parts[name]
@ -533,7 +535,9 @@ def main() -> None:
print("Running solver...")
# 运行真实数值求解器
# 得到 ground truth 曲线
curve_solver, raw_solver = run_solver_and_extract_curve(cfg, params, args.well_index)
curve_solver, raw_solver = run_solver_and_extract_curve(
cfg, params, args.well_index, int(processed["meta"]["curve_dim"])
)
print("Running surrogate...")
model, use_schedule, device = load_model(model_path)

@ -119,7 +119,7 @@ def calc_metrics(
def split_curve_by_layout(curve: np.ndarray, layout: dict) -> dict[str, np.ndarray]:
"""按照 curve_layout 将拼接曲线拆成 log_pressure、log_derivative 和 slope 三段"""
"""按照 curve_layout 拆分曲线"""
parts: dict[str, np.ndarray] = {}
for part in layout["parts"]:
start = int(part["start"])
@ -225,7 +225,6 @@ def build_composite_score(overall_m: dict, part_ms: dict) -> float:
+ 0.5 * overall_m["mae"]
+ 0.8 * part_ms["log_pressure"]["abs_bias"]
+ 0.8 * part_ms["log_derivative"]["rmse"]
+ 0.2 * part_ms["slope"]["rmse"]
)
@ -245,7 +244,8 @@ def plot_sample(
nrmse_text = "nan" if np.isnan(overall["nrmse"]) else f"{overall['nrmse']:.4f}"
r2_text = "nan" if np.isnan(overall["r2"]) else f"{overall['r2']:.4f}"
fig, axes = plt.subplots(3, 2, figsize=(14, 12))
plot_order = [str(part["name"]) for part in curve_layout["parts"]]
fig, axes = plt.subplots(len(plot_order), 2, figsize=(14, 4 * len(plot_order)))
fig.suptitle(
f"{title_prefix} | Sample #{idx} | "
f"Overall RMSE={overall['rmse']:.4f}, MAE={overall['mae']:.4f}, "
@ -254,11 +254,10 @@ def plot_sample(
f"R2={r2_text}"
)
plot_order = ["log_pressure", "log_derivative", "slope"]
title_map = {
"log_pressure": "Log Pressure",
"log_derivative": "Log |Derivative|",
"slope": "Slope of Log Pressure vs Log Time",
"slope": "Legacy Slope",
}
for row, name in enumerate(plot_order):
@ -275,7 +274,7 @@ def plot_sample(
ax_l.plot(x, y_true, label="True", linewidth=2, alpha=0.85)
ax_l.plot(x, y_pred, label="Predicted", linewidth=2, alpha=0.85)
ax_l.set_title(
f"{title_map[name]} | RMSE={m['rmse']:.4f}, MAE={m['mae']:.4f}, "
f"{title_map.get(name, name)} | RMSE={m['rmse']:.4f}, MAE={m['mae']:.4f}, "
f"Bias={m['bias']:.4f}, NRMSE={nrmse_text}, R2={r2_text}"
)
ax_l.set_xlabel("Resampled Time Index")
@ -286,7 +285,7 @@ def plot_sample(
ax_r = axes[row, 1]
ax_r.plot(x, err, linewidth=1.5, alpha=0.85)
ax_r.axhline(0.0, linestyle="--", linewidth=1)
ax_r.set_title(f"{title_map[name]} Error")
ax_r.set_title(f"{title_map.get(name, name)} Error")
ax_r.set_xlabel("Resampled Time Index")
ax_r.set_ylabel("Pred - True")
ax_r.grid(True, alpha=0.3)
@ -311,18 +310,19 @@ def save_sample_metrics_csv(sample_scores: list[dict], path: Path) -> None:
def infer_curve_layout(meta: dict, curve_dim: int) -> dict:
"""从元数据读取曲线分段布局;旧数据没有布局时按压力/导数/斜率三等分回退"""
"""从元数据读取布局;缺失时根据 320/480 等常用维度兼容推断"""
curve_layout = meta.get("curve_layout")
if curve_layout is not None:
return curve_layout
n_time_points = curve_dim // 3
n_parts = 3 if curve_dim % 3 == 0 else 2
n_time_points = curve_dim // n_parts
names = ["log_pressure", "log_derivative"] + (["slope"] if n_parts == 3 else [])
return {
"n_time_points": int(n_time_points),
"parts": [
{"name": "log_pressure", "start": 0, "end": n_time_points},
{"name": "log_derivative", "start": n_time_points, "end": 2 * n_time_points},
{"name": "slope", "start": 2 * n_time_points, "end": 3 * n_time_points},
{"name": name, "start": i * n_time_points, "end": (i + 1) * n_time_points}
for i, name in enumerate(names)
],
}
@ -470,15 +470,12 @@ def main() -> None:
print("Inference complete.")
overall_metric_list: list[dict] = []
part_metric_lists = {
"log_pressure": [],
"log_derivative": [],
"slope": [],
}
part_names = [str(part["name"]) for part in curve_layout["parts"]]
part_metric_lists = {name: [] for name in part_names}
sample_scores: list[dict] = []
for idx, (curve_true, curve_pred) in enumerate(zip(all_true, all_pred)):
# 同时记录整体指标和三段指标,便于判断误差来自压力、导数还是辅助 slope
# 同时记录整体指标和各曲线通道指标
overall_m = calc_metrics(curve_true, curve_pred)
overall_metric_list.append(overall_m)
@ -486,7 +483,7 @@ def main() -> None:
pred_parts = split_curve_by_layout(curve_pred, curve_layout)
part_ms: dict[str, dict] = {}
for name in ["log_pressure", "log_derivative", "slope"]:
for name in part_names:
part_m = calc_metrics(true_parts[name], pred_parts[name])
part_metric_lists[name].append(part_m)
part_ms[name] = part_m
@ -513,20 +510,12 @@ def main() -> None:
"log_derivative_abs_bias": part_ms["log_derivative"]["abs_bias"],
"log_derivative_nrmse": part_ms["log_derivative"]["nrmse"],
"log_derivative_r2": part_ms["log_derivative"]["r2"],
"slope_rmse": part_ms["slope"]["rmse"],
"slope_mae": part_ms["slope"]["mae"],
"slope_bias": part_ms["slope"]["bias"],
"slope_abs_bias": part_ms["slope"]["abs_bias"],
"slope_nrmse": part_ms["slope"]["nrmse"],
"slope_r2": part_ms["slope"]["r2"],
"overall_valid_nrmse": overall_m["valid_nrmse"],
"overall_valid_r2": overall_m["valid_r2"],
"log_pressure_valid_nrmse": part_ms["log_pressure"]["valid_nrmse"],
"log_pressure_valid_r2": part_ms["log_pressure"]["valid_r2"],
"log_derivative_valid_nrmse": part_ms["log_derivative"]["valid_nrmse"],
"log_derivative_valid_r2": part_ms["log_derivative"]["valid_r2"],
"slope_valid_nrmse": part_ms["slope"]["valid_nrmse"],
"slope_valid_r2": part_ms["slope"]["valid_r2"],
}
)
@ -538,7 +527,6 @@ def main() -> None:
"overall": summarize_metric_dicts(overall_metric_list, "overall"),
"log_pressure": summarize_metric_dicts(part_metric_lists["log_pressure"], "log_pressure"),
"log_derivative": summarize_metric_dicts(part_metric_lists["log_derivative"], "log_derivative"),
"slope": summarize_metric_dicts(part_metric_lists["slope"], "slope"),
"checkpoint": {
"model_path": str(model_path),
"processed_path": str(processed_path),
@ -559,7 +547,9 @@ def main() -> None:
n_worst = min(args.n_worst_plots, len(sample_scores))
best_indices = np.argsort(score_composite)[:n_best].tolist()
worst_indices = np.argsort(score_composite)[-n_worst:].tolist()
worst_indices = (
np.argsort(score_composite)[-n_worst:].tolist() if n_worst > 0 else []
)
random_indices = random.sample(range(len(sample_scores)), n_random)
# 三类图各有用途best 看上限worst 定位失败模式random 检查整体观感。

@ -1,419 +0,0 @@
"""评估正演代理模型集成并估计预测不确定性。
脚本按多个随机种子加载同结构模型计算集成均值预测成员间标准差和逐样本误差
导出不确定性-误差相关性统计散点图和高不确定性样本用于判断集成方差是否可作为
自动拟合候选筛选或风险提示信号
"""
# pylint: disable=import-error,wrong-import-position
# pylint: disable=too-many-locals,too-many-arguments,too-many-positional-arguments
# pylint: disable=too-many-statements
from __future__ import annotations
import argparse
import csv
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
sys.path.append(str(ROOT))
import joblib
import matplotlib.pyplot as plt
import numpy as np
import torch
from src.common.experiment_paths import normalize_tag, processed_path_for_tag
from src.models.forward_surrogate import ForwardSurrogate
def parse_seed_list(seed_text: str) -> list[int]:
"""解析逗号分隔的随机种子列表,用于训练或评估模型集成。"""
seeds = []
for item in str(seed_text).split(","):
item = item.strip()
if not item:
continue
seeds.append(int(item))
if not seeds:
raise ValueError("至少需要一个 seed")
return seeds
def default_model_root(tag: str | None, use_schedule: bool) -> Path:
"""根据实验标签推导集成模型成员所在的根目录。"""
suffix = "" if use_schedule else "_no_schedule"
if tag:
return Path("models") / f"forward_surrogate_{tag}_ensemble{suffix}"
return Path("models") / f"forward_surrogate_ensemble{suffix}"
def default_output_dir(tag: str | None, use_schedule: bool) -> Path:
"""根据实验标签生成当前分析脚本默认的输出目录。"""
suffix = "" if use_schedule else "_no_schedule"
if tag:
return Path("results") / f"evaluation_{tag}_ensemble_uq{suffix}"
return Path("results") / f"evaluation_ensemble_uq{suffix}"
def parse_args() -> argparse.Namespace:
"""解析深度集成评估所需的数据、成员模型清单、样本数量和绘图数量。"""
parser = argparse.ArgumentParser(description="Evaluate deep-ensemble UQ for forward surrogate")
parser.add_argument("--processed", type=str, default=None, help="Processed dataset path")
parser.add_argument("--tag", type=str, default=None, help="Experiment tag")
parser.add_argument(
"--model-root",
type=str,
default=None,
help="Root dir that contains seed_* members",
)
parser.add_argument("--output-dir", type=str, default=None, help="Evaluation output dir")
parser.add_argument("--seeds", type=str, default="41,42,43", help="Comma-separated seed list")
parser.add_argument("--no-schedule", action="store_true")
parser.add_argument("--n-top-uncertain-plots", type=int, default=5)
return parser.parse_args()
def calc_metrics(
y_true: np.ndarray,
y_pred: np.ndarray,
eps_range: float = 1e-3,
eps_var: float = 1e-6,
) -> dict:
"""计算 RMSE、MAE、Bias、NRMSE、R2 等回归指标。"""
err = y_pred - y_true
mse = np.mean(err**2)
rmse = float(np.sqrt(mse))
mae = float(np.mean(np.abs(err)))
bias = float(np.mean(err))
value_range = float(np.max(y_true) - np.min(y_true))
ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
ss_res = float(np.sum(err**2))
valid_nrmse = value_range > eps_range
valid_r2 = ss_tot > eps_var
nrmse = float(rmse / value_range) if valid_nrmse else np.nan
r2 = float(1.0 - ss_res / ss_tot) if valid_r2 else np.nan
return {
"rmse": rmse,
"mae": mae,
"bias": bias,
"abs_bias": float(abs(bias)),
"nrmse": nrmse,
"r2": r2,
"valid_nrmse": bool(valid_nrmse),
"valid_r2": bool(valid_r2),
}
def infer_curve_layout(meta: dict, curve_dim: int) -> dict:
"""从元数据读取曲线分段布局;旧数据没有布局时按压力/导数/斜率三等分回退。"""
curve_layout = meta.get("curve_layout")
if curve_layout is not None:
return curve_layout
n_time_points = curve_dim // 3
return {
"n_time_points": int(n_time_points),
"parts": [
{"name": "log_pressure", "start": 0, "end": n_time_points},
{"name": "log_derivative", "start": n_time_points, "end": 2 * n_time_points},
{"name": "slope", "start": 2 * n_time_points, "end": 3 * n_time_points},
],
}
def split_curve_by_layout(curve: np.ndarray, layout: dict) -> dict[str, np.ndarray]:
"""按照 curve_layout 将拼接曲线拆成 log_pressure、log_derivative 和 slope 三段。"""
parts: dict[str, np.ndarray] = {}
for part in layout["parts"]:
start = int(part["start"])
end = int(part["end"])
parts[str(part["name"])] = curve[start:end]
return parts
def load_member(checkpoint_path: Path, device: torch.device) -> tuple[ForwardSurrogate, dict]:
"""加载集成中的一个模型成员、对应归一化器和模型配置。"""
checkpoint = torch.load(checkpoint_path, map_location="cpu")
model = ForwardSurrogate(
param_dim=int(checkpoint["param_dim"]),
schedule_dim=int(checkpoint["schedule_dim"]),
curve_dim=int(checkpoint["curve_dim"]),
hidden_dim=int(checkpoint["hidden_dim"]),
dropout=float(checkpoint["dropout"]),
use_schedule=bool(checkpoint.get("use_schedule", True)),
).to(device)
model.load_state_dict(checkpoint["model_state_dict"])
model.eval()
return model, checkpoint
def safe_mean(x: np.ndarray) -> float:
"""计算忽略 NaN 后的均值;没有有效数据时返回 NaN。"""
return float(np.mean(x)) if x.size else np.nan
def safe_median(x: np.ndarray) -> float:
"""计算忽略 NaN 后的中位数;没有有效数据时返回 NaN。"""
return float(np.median(x)) if x.size else np.nan
def safe_percentile(x: np.ndarray, q: float) -> float:
"""计算忽略 NaN 后的百分位数;没有有效数据时返回 NaN。"""
return float(np.percentile(x, q)) if x.size else np.nan
def pearson_corr(x: np.ndarray, y: np.ndarray) -> float:
"""计算 Pearson 相关系数;样本数不足或方差为零时返回 NaN。"""
if x.size == 0 or y.size == 0:
return np.nan
if np.allclose(np.std(x), 0.0) or np.allclose(np.std(y), 0.0):
return np.nan
return float(np.corrcoef(x, y)[0, 1])
def plot_uncertainty_scatter(sample_rows: list[dict], output_path: Path) -> None:
"""绘制预测不确定性与真实误差的散点图,检查二者是否相关。"""
rmse = np.array([row["overall_rmse"] for row in sample_rows], dtype=np.float64)
unc = np.array([row["unc_mean_std"] for row in sample_rows], dtype=np.float64)
plt.figure(figsize=(7, 5))
plt.scatter(unc, rmse, s=10, alpha=0.35)
plt.xlabel("Predictive Uncertainty (mean std)")
plt.ylabel("Overall RMSE")
plt.title(f"Uncertainty vs Error | Pearson={pearson_corr(unc, rmse):.4f}")
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close()
def plot_uncertain_sample(
idx: int,
curve_true: np.ndarray,
curve_mean: np.ndarray,
curve_std: np.ndarray,
curve_layout: dict,
output_dir: Path,
unc_score: float,
rmse: float,
) -> None:
"""绘制高不确定性样本的真实曲线、集成均值和成员分布。"""
true_parts = split_curve_by_layout(curve_true, curve_layout)
mean_parts = split_curve_by_layout(curve_mean, curve_layout)
std_parts = split_curve_by_layout(curve_std, curve_layout)
title_map = {
"log_pressure": "Log Pressure",
"log_derivative": "Log |Derivative|",
"slope": "Slope of Log Pressure vs Log Time",
}
fig, axes = plt.subplots(3, 1, figsize=(12, 10))
fig.suptitle(
f"High-Uncertainty Sample #{idx} | unc_mean_std={unc_score:.4f}, overall_rmse={rmse:.4f}"
)
for ax, name in zip(axes, ["log_pressure", "log_derivative", "slope"]):
y_true = true_parts[name]
y_mean = mean_parts[name]
y_std = std_parts[name]
x = np.arange(len(y_true))
ax.plot(x, y_true, label="True", linewidth=2)
ax.plot(x, y_mean, label="Ensemble mean", linewidth=2)
ax.fill_between(
x,
y_mean - 2.0 * y_std,
y_mean + 2.0 * y_std,
alpha=0.2,
label="mean ± 2 std",
)
ax.set_title(title_map[name])
ax.grid(True, alpha=0.3)
ax.legend()
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig(output_dir / f"top_uncertain_sample_{idx:04d}.png", dpi=150, bbox_inches="tight")
plt.close()
def main() -> None:
"""汇总多个代理模型成员的均值和方差,用于测试集误差与不确定性评估。"""
args = parse_args()
tag = normalize_tag(args.tag)
use_schedule = not args.no_schedule
seeds = parse_seed_list(args.seeds)
processed_path = (
Path(args.processed)
if args.processed is not None
else processed_path_for_tag(tag)
)
model_root = (
Path(args.model_root)
if args.model_root is not None
else default_model_root(tag, use_schedule)
)
output_dir = (
Path(args.output_dir)
if args.output_dir is not None
else default_output_dir(tag, use_schedule)
)
output_dir.mkdir(parents=True, exist_ok=True)
# 集成评估必须使用同一份 processed 数据,保证各成员输入标准化口径一致。
data = joblib.load(processed_path)
x_params_test = data["X_params_test"]
x_schedule_test = data["X_schedule_test"]
y_curve_test = data["Y_curve_test"]
scaler_curve = data["scaler_curve"]
curve_layout = infer_curve_layout(data["meta"], int(data["meta"]["curve_dim"]))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
members: list[tuple[int, ForwardSurrogate]] = []
member_paths = []
first_use_schedule = None
for seed in seeds:
ckpt_path = model_root / f"seed_{seed}" / "forward_surrogate_best.pt"
model, checkpoint = load_member(ckpt_path, device)
member_paths.append(str(ckpt_path))
members.append((seed, model))
cur_use_schedule = bool(checkpoint.get("use_schedule", True))
if first_use_schedule is None:
first_use_schedule = cur_use_schedule
elif first_use_schedule != cur_use_schedule:
# 混用带/不带 schedule 的模型会导致输入含义不同,不能直接求均值和方差。
raise RuntimeError("Ensemble 成员的 use_schedule 设置不一致")
all_true = []
all_mean = []
all_std = []
sample_rows = []
with torch.no_grad():
for idx in range(len(x_params_test)):
params_t = torch.tensor(
x_params_test[idx : idx + 1],
dtype=torch.float32,
device=device,
)
schedule_t = torch.tensor(
x_schedule_test[idx : idx + 1],
dtype=torch.float32,
device=device,
)
member_preds = []
for _, model in members:
# 每个成员独立预测后先反标准化;集成均值和标准差都在原始曲线尺度上计算。
if first_use_schedule:
pred_scaled = model(params_t, schedule_t).cpu().numpy()
else:
pred_scaled = model(params_t, None).cpu().numpy()
pred = scaler_curve.inverse_transform(pred_scaled)[0].astype(np.float32)
member_preds.append(pred)
member_preds = np.stack(member_preds, axis=0)
curve_true = scaler_curve.inverse_transform(
y_curve_test[idx : idx + 1]
)[0].astype(np.float32)
curve_mean = member_preds.mean(axis=0).astype(np.float32)
curve_std = member_preds.std(axis=0, ddof=0).astype(np.float32)
metrics = calc_metrics(curve_true, curve_mean)
parts_std = split_curve_by_layout(curve_std, curve_layout)
# std 作为经验不确定性指标,后续会和真实 RMSE 做相关性分析。
sample_rows.append(
{
"idx": idx,
"overall_rmse": metrics["rmse"],
"overall_mae": metrics["mae"],
"overall_bias": metrics["bias"],
"overall_r2": metrics["r2"],
"unc_mean_std": float(np.mean(curve_std)),
"unc_max_std": float(np.max(curve_std)),
"unc_log_pressure_mean_std": float(np.mean(parts_std["log_pressure"])),
"unc_log_derivative_mean_std": float(np.mean(parts_std["log_derivative"])),
"unc_slope_mean_std": float(np.mean(parts_std["slope"])),
}
)
all_true.append(curve_true)
all_mean.append(curve_mean)
all_std.append(curve_std)
all_true = np.stack(all_true, axis=0)
all_mean = np.stack(all_mean, axis=0)
all_std = np.stack(all_std, axis=0)
overall_metrics = [calc_metrics(t, p) for t, p in zip(all_true, all_mean)]
rmse = np.array([m["rmse"] for m in overall_metrics], dtype=np.float64)
mae = np.array([m["mae"] for m in overall_metrics], dtype=np.float64)
r2_valid = np.array([m["r2"] for m in overall_metrics if m["valid_r2"]], dtype=np.float64)
unc = np.array([row["unc_mean_std"] for row in sample_rows], dtype=np.float64)
# 聚合输出分为预测质量和不确定性质量两部分,方便单独比较 ensemble 是否有价值。
summary = {
"ensemble": {
"member_count": len(members),
"member_paths": member_paths,
"use_schedule": bool(first_use_schedule),
"processed_path": str(processed_path),
},
"prediction": {
"rmse_mean": safe_mean(rmse),
"rmse_median": safe_median(rmse),
"rmse_p90": safe_percentile(rmse, 90),
"mae_mean": safe_mean(mae),
"mae_median": safe_median(mae),
"r2_mean_valid": safe_mean(r2_valid),
"r2_median_valid": safe_median(r2_valid),
},
"uncertainty": {
"unc_mean": safe_mean(unc),
"unc_median": safe_median(unc),
"unc_p90": safe_percentile(unc, 90),
"unc_vs_rmse_pearson": pearson_corr(unc, rmse),
},
}
with open(output_dir / "ensemble_uq_summary.json", "w", encoding="utf-8") as f:
json.dump(summary, f, ensure_ascii=False, indent=2)
with open(
output_dir / "sample_uncertainty_metrics.csv",
"w",
newline="",
encoding="utf-8-sig",
) as f:
writer = csv.DictWriter(f, fieldnames=list(sample_rows[0].keys()))
writer.writeheader()
writer.writerows(sample_rows)
plot_uncertainty_scatter(sample_rows, output_dir / "uncertainty_vs_error.png")
top_k = min(args.n_top_uncertain_plots, len(sample_rows))
top_uncertain = sorted(sample_rows, key=lambda row: row["unc_mean_std"], reverse=True)[:top_k]
# 只绘制最高不确定性的样本,重点检查模型成员分歧最大的区域。
for row in top_uncertain:
idx = int(row["idx"])
plot_uncertain_sample(
idx=idx,
curve_true=all_true[idx],
curve_mean=all_mean[idx],
curve_std=all_std[idx],
curve_layout=curve_layout,
output_dir=output_dir,
unc_score=float(row["unc_mean_std"]),
rmse=float(row["overall_rmse"]),
)
print("Ensemble UQ evaluation complete.")
print(f"Output dir: {output_dir}")
print(f"RMSE mean={summary['prediction']['rmse_mean']:.6f}")
print(f"UQ-RMSE Pearson={summary['uncertainty']['unc_vs_rmse_pearson']:.6f}")
if __name__ == "__main__":
main()

@ -1,684 +0,0 @@
"""评估时间条件正演代理模型。
时间条件模型以样本参数 + 单个时间点为输入预测压力/导数因此本脚本会把整条曲线
展开为点级批量推理再还原为样本级误差统计域内/域外分组摘要和最差案例图
该评估用于检查模型在可变时间采样与 PSO 参数域上的泛化能力
"""
from __future__ import annotations
# pylint: disable=import-error,wrong-import-position,line-too-long
# pylint: disable=too-many-arguments,too-many-positional-arguments
# pylint: disable=too-many-locals,too-many-statements
# pylint: disable=invalid-name,unused-argument,too-many-function-args
import argparse
import csv
import json
import random
import sys
from pathlib import Path
from typing import Iterable
import joblib
import matplotlib.pyplot as plt
import numpy as np
import torch
ROOT = Path(__file__).resolve().parents[1]
sys.path.append(str(ROOT))
from src.common.experiment_paths import normalize_tag, processed_path_for_tag
from src.data.param_features import inverse_transform_param_features
from src.models.time_conditioned_surrogate import TimeConditionedSurrogate
from src.training.train_forward import get_part_slices, infer_curve_layout
DEFAULT_RANDOM_SEED = 42
DEFAULT_PSO_DOMAIN = {
"k_min": 0.001,
"k_max": 10.0,
"skin_min": -10.0,
"skin_max": 10.0,
"wellboreC_min": 1.0e-4,
"wellboreC_max": 2.0,
"phi_min": 0.01,
"phi_max": 0.5,
"h_min": 2.0,
"h_max": 50.0,
}
def parse_args() -> argparse.Namespace:
"""解析时间条件代理模型评估所需的数据、checkpoint、样本数和输出目录。"""
parser = argparse.ArgumentParser(description="Evaluate a time-conditioned point-wise surrogate")
parser.add_argument("--processed", type=str, default=None, help="Processed dataset path")
parser.add_argument("--tag", type=str, default=None, help="Experiment tag for auto naming")
parser.add_argument("--model", type=str, default=None, help="Model checkpoint path")
parser.add_argument("--output-dir", type=str, default=None, help="Optional evaluation output directory")
parser.add_argument("--batch-size", type=int, default=65536, help="Point batch size for inference")
parser.add_argument("--device", type=str, default=None, help="Override device, e.g. cpu or cuda")
parser.add_argument("--seed", type=int, default=DEFAULT_RANDOM_SEED)
parser.add_argument("--n-random-plots", type=int, default=5)
parser.add_argument("--n-best-plots", type=int, default=5)
parser.add_argument("--n-worst-plots", type=int, default=10)
parser.add_argument("--top-k-analysis", type=int, default=300)
parser.add_argument("--pso-k-min", type=float, default=DEFAULT_PSO_DOMAIN["k_min"])
parser.add_argument("--pso-k-max", type=float, default=DEFAULT_PSO_DOMAIN["k_max"])
parser.add_argument("--pso-h-min", type=float, default=DEFAULT_PSO_DOMAIN["h_min"])
parser.add_argument("--pso-h-max", type=float, default=DEFAULT_PSO_DOMAIN["h_max"])
parser.add_argument("--pso-skin-min", type=float, default=DEFAULT_PSO_DOMAIN["skin_min"])
parser.add_argument("--pso-skin-max", type=float, default=DEFAULT_PSO_DOMAIN["skin_max"])
parser.add_argument("--pso-wellboreC-min", type=float, default=DEFAULT_PSO_DOMAIN["wellboreC_min"])
parser.add_argument("--pso-wellboreC-max", type=float, default=DEFAULT_PSO_DOMAIN["wellboreC_max"])
parser.add_argument("--pso-phi-min", type=float, default=DEFAULT_PSO_DOMAIN["phi_min"])
parser.add_argument("--pso-phi-max", type=float, default=DEFAULT_PSO_DOMAIN["phi_max"])
return parser.parse_args()
def default_model_path(tag: str | None) -> Path:
"""根据实验标签和是否使用流量制度输入推导默认模型检查点路径。"""
if tag:
return Path("models") / f"time_conditioned_surrogate_{tag}" / "time_conditioned_surrogate_best.pt"
return Path("models/time_conditioned_surrogate/time_conditioned_surrogate_best.pt")
def default_output_dir(tag: str | None) -> Path:
"""根据实验标签生成当前分析脚本默认的输出目录。"""
if tag:
return Path("results") / f"evaluation_time_conditioned_{tag}"
return Path("results/evaluation_time_conditioned")
def percentile_summary(values: np.ndarray) -> dict:
"""计算数组的均值、中位数和若干百分位数,用于误差分布汇总。"""
x = np.asarray(values, dtype=np.float64).reshape(-1)
if x.size == 0:
return {
"min": None,
"p05": None,
"p25": None,
"median": None,
"p75": None,
"p90": None,
"p95": None,
"max": None,
}
return {
"min": float(np.min(x)),
"p05": float(np.percentile(x, 5)),
"p25": float(np.percentile(x, 25)),
"median": float(np.percentile(x, 50)),
"p75": float(np.percentile(x, 75)),
"p90": float(np.percentile(x, 90)),
"p95": float(np.percentile(x, 95)),
"max": float(np.max(x)),
}
def point_metrics(true: np.ndarray, pred: np.ndarray) -> dict:
"""计算逐时间点预测的误差指标。"""
err = np.asarray(pred, dtype=np.float64) - np.asarray(true, dtype=np.float64)
abs_err = np.abs(err)
return {
"rmse": float(np.sqrt(np.mean(err**2))),
"mae": float(np.mean(abs_err)),
"bias": float(np.mean(err)),
"p90_abs": float(np.percentile(abs_err, 90)),
"p95_abs": float(np.percentile(abs_err, 95)),
}
def sample_metrics(true_p: np.ndarray, pred_p: np.ndarray, true_d: np.ndarray, pred_d: np.ndarray) -> list[dict]:
"""计算单条曲线样本的整体误差指标。"""
rows: list[dict] = []
for idx in range(true_p.shape[0]):
p_err = pred_p[idx] - true_p[idx]
d_err = pred_d[idx] - true_d[idx]
rmse_p = float(np.sqrt(np.mean(p_err**2)))
rmse_d = float(np.sqrt(np.mean(d_err**2)))
mae_p = float(np.mean(np.abs(p_err)))
mae_d = float(np.mean(np.abs(d_err)))
rows.append(
{
"idx": idx,
"rmse_p": rmse_p,
"rmse_d": rmse_d,
"mae_p": mae_p,
"mae_d": mae_d,
"score": float(rmse_p + 2.0 * rmse_d),
}
)
return rows
def write_csv(path: Path, rows: list[dict], fieldnames: list[str] | None = None) -> None:
"""按字段名写出 CSV 明细或汇总结果。"""
path.parent.mkdir(parents=True, exist_ok=True)
if not rows:
path.write_text("", encoding="utf-8-sig")
return
names = fieldnames or list(rows[0].keys())
with path.open("w", newline="", encoding="utf-8-sig") as f:
writer = csv.DictWriter(f, fieldnames=names, extrasaction="ignore")
writer.writeheader()
writer.writerows(rows)
def iter_batches(total: int, batch_size: int) -> Iterable[tuple[int, int]]:
"""把样本数量切分为批量索引范围,避免评估时一次性占用过多内存。"""
batch = max(1, int(batch_size))
for start in range(0, int(total), batch):
yield start, min(start + batch, int(total))
def load_model(model_path: Path, device: torch.device) -> tuple[TimeConditionedSurrogate, dict]:
"""加载模型检查点,按保存的维度和超参数重建网络并切换到评估模式。"""
checkpoint = torch.load(model_path, map_location="cpu")
model = TimeConditionedSurrogate(
param_dim=int(checkpoint["param_dim"]),
schedule_dim=int(checkpoint["schedule_dim"]),
time_dim=int(checkpoint["time_dim"]),
hidden_dim=int(checkpoint["hidden_dim"]),
n_blocks=int(checkpoint["n_blocks"]),
dropout=float(checkpoint["dropout"]),
use_schedule=bool(checkpoint.get("use_schedule", True)),
)
model.load_state_dict(checkpoint["model_state_dict"])
model.eval()
model.to(device)
return model, checkpoint
def predict_scaled_points(
model: TimeConditionedSurrogate,
params_x: np.ndarray,
schedule_x: np.ndarray,
time_x: np.ndarray,
device: torch.device,
batch_size: int,
) -> np.ndarray:
"""在标准化空间中批量预测时间条件模型的逐点输出。"""
n_samples, n_time, time_dim = time_x.shape
params_flat = np.repeat(params_x, n_time, axis=0)
schedule_flat = np.repeat(schedule_x, n_time, axis=0)
time_flat = time_x.reshape(n_samples * n_time, time_dim)
pred_flat = np.empty((n_samples * n_time, 2), dtype=np.float32)
use_schedule = bool(model.use_schedule)
with torch.no_grad():
for start, end in iter_batches(len(time_flat), batch_size):
params_t = torch.tensor(params_flat[start:end], dtype=torch.float32, device=device)
time_t = torch.tensor(time_flat[start:end], dtype=torch.float32, device=device)
if use_schedule:
schedule_t = torch.tensor(schedule_flat[start:end], dtype=torch.float32, device=device)
else:
schedule_t = None
pred_flat[start:end] = model(params_t, time_t, schedule_t).detach().cpu().numpy()
return pred_flat.reshape(n_samples, n_time, 2)
def inverse_curve_part(values_scaled: np.ndarray, scaler_curve: object, part_slice: slice) -> np.ndarray:
"""使用曲线 scaler 的对应分段参数把预测值恢复到原始尺度。"""
mean = np.asarray(scaler_curve.mean_[part_slice], dtype=np.float32)
scale = np.asarray(scaler_curve.scale_[part_slice], dtype=np.float32)
return values_scaled.astype(np.float32) * scale.reshape(1, -1) + mean.reshape(1, -1)
def recover_raw_params(data: dict) -> dict[str, np.ndarray]:
"""把标准化后的参数特征反变换回原始物理参数。"""
meta = data.get("meta", {}) or {}
features = data["scaler_params"].inverse_transform(data["X_params_test"])
raw = inverse_transform_param_features(features, meta.get("param_feature_transform"))
names = list(meta.get("param_names") or ["k", "skin", "wellboreC", "phi", "h", "Cf"])
return {name: raw[:, idx].astype(np.float64) for idx, name in enumerate(names[: raw.shape[1]])}
def build_pso_mask(params: dict[str, np.ndarray], args: argparse.Namespace) -> np.ndarray:
"""根据元数据标记出 PSO/自动拟合相关样本,用于单独统计。"""
return (
(params["k"] >= float(args.pso_k_min))
& (params["k"] <= float(args.pso_k_max))
& (params["skin"] >= float(args.pso_skin_min))
& (params["skin"] <= float(args.pso_skin_max))
& (params["wellboreC"] >= float(args.pso_wellboreC_min))
& (params["wellboreC"] <= float(args.pso_wellboreC_max))
& (params["phi"] >= float(args.pso_phi_min))
& (params["phi"] <= float(args.pso_phi_max))
& (params["h"] >= float(args.pso_h_min))
& (params["h"] <= float(args.pso_h_max))
)
def summarize_group(score: np.ndarray, rmse_p: np.ndarray, rmse_d: np.ndarray, mask: np.ndarray) -> dict:
"""对一个样本分组计算误差、不确定性和样本数量等汇总指标。"""
m = np.asarray(mask, dtype=bool)
return {
"n": int(np.sum(m)),
"score": percentile_summary(score[m]),
"rmse_p": percentile_summary(rmse_p[m]),
"rmse_d": percentile_summary(rmse_d[m]),
"score_gt_1_ratio": float(np.mean(score[m] > 1.0)) if np.any(m) else None,
"score_gt_2_ratio": float(np.mean(score[m] > 2.0)) if np.any(m) else None,
"score_gt_5_ratio": float(np.mean(score[m] > 5.0)) if np.any(m) else None,
}
def build_domain_summary(sample_rows: list[dict], params: dict[str, np.ndarray], pso_mask: np.ndarray) -> dict:
"""按数据域或流量制度类别构建评估摘要。"""
score = np.asarray([r["score"] for r in sample_rows], dtype=np.float64)
rmse_p = np.asarray([r["rmse_p"] for r in sample_rows], dtype=np.float64)
rmse_d = np.asarray([r["rmse_d"] for r in sample_rows], dtype=np.float64)
skin = params["skin"]
wellboreC = params["wellboreC"]
order = np.argsort(-score)
top100 = order[: min(100, order.size)]
return {
"all": summarize_group(score, rmse_p, rmse_d, np.ones_like(pso_mask, dtype=bool)),
"pso_domain": summarize_group(score, rmse_p, rmse_d, pso_mask),
"outside_pso_domain": summarize_group(score, rmse_p, rmse_d, ~pso_mask),
"pso_skin_lt_minus_5": summarize_group(score, rmse_p, rmse_d, pso_mask & (skin < -5.0)),
"pso_skin_lt_minus_8": summarize_group(score, rmse_p, rmse_d, pso_mask & (skin < -8.0)),
"pso_skin_lt_minus_5_wellboreC_gt_0_1": summarize_group(
score,
rmse_p,
rmse_d,
pso_mask & (skin < -5.0) & (wellboreC > 0.1),
),
"top100": {
"outside_pso_domain": int(np.sum(~pso_mask[top100])),
"k_lt_0_001": int(np.sum(params["k"][top100] < 0.001)),
"k_gt_10": int(np.sum(params["k"][top100] > 10.0)),
"h_gt_50": int(np.sum(params["h"][top100] > 50.0)),
"pso_skin_lt_minus_5_wellboreC_gt_0_1": int(
np.sum((pso_mask & (skin < -5.0) & (wellboreC > 0.1))[top100])
),
},
}
def summarize_params_for_indices(params: dict[str, np.ndarray], indices: np.ndarray) -> dict:
"""统计指定样本索引对应的物理参数范围和中心值。"""
return {
name: percentile_summary(values[np.asarray(indices, dtype=int)])
for name, values in params.items()
if name in {"k", "skin", "wellboreC", "phi", "h", "Cf"}
}
def build_worst_case_summary(
sample_rows: list[dict],
params: dict[str, np.ndarray],
pso_mask: np.ndarray,
top_k: int,
) -> dict:
"""挑选误差最大的样本并整理其指标、参数和元数据。"""
score = np.asarray([r["score"] for r in sample_rows], dtype=np.float64)
order_worst = np.argsort(-score)
order_best = np.argsort(score)
top = order_worst[: min(int(top_k), order_worst.size)]
worst100 = order_worst[: min(100, order_worst.size)]
best100 = order_best[: min(100, order_best.size)]
return {
"top_k": int(top.size),
"metrics": {
"score": percentile_summary(score),
"n_score_gt_1": int(np.sum(score > 1.0)),
"n_score_gt_2": int(np.sum(score > 2.0)),
"n_score_gt_5": int(np.sum(score > 5.0)),
},
"pso_domain": {
"n_inside": int(np.sum(pso_mask)),
"n_outside": int(np.sum(~pso_mask)),
"top100_outside": int(np.sum(~pso_mask[worst100])),
"top100_k_lt_0_001": int(np.sum(params["k"][worst100] < 0.001)),
"top100_k_gt_10": int(np.sum(params["k"][worst100] > 10.0)),
"top100_h_gt_50": int(np.sum(params["h"][worst100] > 50.0)),
},
"params": {
"all": summarize_params_for_indices(params, np.arange(score.size)),
"worst_top_k": summarize_params_for_indices(params, top),
"worst100": summarize_params_for_indices(params, worst100),
"best100": summarize_params_for_indices(params, best100),
},
}
def build_worst_case_rows(
sample_rows: list[dict],
params: dict[str, np.ndarray],
data: dict,
true_p: np.ndarray,
true_d: np.ndarray,
pred_p: np.ndarray,
pred_d: np.ndarray,
pso_mask: np.ndarray,
top_k: int,
) -> tuple[list[dict], list[dict]]:
"""生成最差样本明细行,供 CSV 输出和人工检查。"""
score = np.asarray([r["score"] for r in sample_rows], dtype=np.float64)
order = np.argsort(-score)[: min(int(top_k), len(sample_rows))]
family = data.get("family_name_test")
schedule_meta = data.get("schedule_meta_test")
schedule_meta_names = list((data.get("meta", {}) or {}).get("schedule_meta_names") or [])
case_rows: list[dict] = []
residual_rows: list[dict] = []
for rank, idx in enumerate(order, 1):
p_res = pred_p[idx] - true_p[idx]
d_res = pred_d[idx] - true_d[idx]
p_rmse = float(np.sqrt(np.mean(p_res**2)))
d_rmse = float(np.sqrt(np.mean(d_res**2)))
p_mean = float(np.mean(p_res))
d_mean = float(np.mean(d_res))
p_std = float(np.std(p_res))
d_std = float(np.std(d_res))
row = {
"rank": rank,
"idx": int(idx),
"score": float(score[idx]),
"rmse_p": float(sample_rows[idx]["rmse_p"]),
"rmse_d": float(sample_rows[idx]["rmse_d"]),
"mae_p": float(sample_rows[idx]["mae_p"]),
"mae_d": float(sample_rows[idx]["mae_d"]),
"in_pso_domain": int(bool(pso_mask[idx])),
"family": str(family[idx]) if family is not None else "",
}
for name in ["k", "skin", "wellboreC", "phi", "h", "Cf"]:
if name in params:
row[name] = float(params[name][idx])
if schedule_meta is not None:
for midx, name in enumerate(schedule_meta_names):
if midx < schedule_meta.shape[1]:
row[f"sched_{name}"] = float(schedule_meta[idx, midx])
case_rows.append(row)
residual_rows.append(
{
"rank": rank,
"idx": int(idx),
"score": float(score[idx]),
"p_res_mean": p_mean,
"p_res_std": p_std,
"p_res_rmse": p_rmse,
"p_shift_ratio": float(abs(p_mean) / max(p_rmse, 1.0e-12)),
"d_res_mean": d_mean,
"d_res_std": d_std,
"d_res_rmse": d_rmse,
"d_shift_ratio": float(abs(d_mean) / max(d_rmse, 1.0e-12)),
"p_res_first": float(p_res[0]),
"p_res_last": float(p_res[-1]),
"d_res_first": float(d_res[0]),
"d_res_last": float(d_res[-1]),
}
)
return case_rows, residual_rows
def plot_sample(
output_path: Path,
idx: int,
t: np.ndarray,
true_p: np.ndarray,
pred_p: np.ndarray,
true_d: np.ndarray,
pred_d: np.ndarray,
title: str,
) -> None:
"""绘制单个样本的真实曲线、预测曲线和误差曲线,并保存为图片。"""
x = np.asarray(t, dtype=np.float64)
fig, axes = plt.subplots(2, 2, figsize=(13, 8))
fig.suptitle(title)
axes[0, 0].plot(x, true_p, label="True", linewidth=2)
axes[0, 0].plot(x, pred_p, label="Pred", linewidth=2)
axes[0, 0].set_title("Log Pressure")
axes[0, 0].set_xscale("log")
axes[0, 0].grid(True, alpha=0.3)
axes[0, 0].legend()
axes[0, 1].plot(x, pred_p - true_p, linewidth=1.5)
axes[0, 1].axhline(0.0, linestyle="--", linewidth=1)
axes[0, 1].set_title("Pressure Residual")
axes[0, 1].set_xscale("log")
axes[0, 1].grid(True, alpha=0.3)
axes[1, 0].plot(x, true_d, label="True", linewidth=2)
axes[1, 0].plot(x, pred_d, label="Pred", linewidth=2)
axes[1, 0].set_title("Log Derivative")
axes[1, 0].set_xscale("log")
axes[1, 0].grid(True, alpha=0.3)
axes[1, 0].legend()
axes[1, 1].plot(x, pred_d - true_d, linewidth=1.5)
axes[1, 1].axhline(0.0, linestyle="--", linewidth=1)
axes[1, 1].set_title("Derivative Residual")
axes[1, 1].set_xscale("log")
axes[1, 1].grid(True, alpha=0.3)
for ax in axes.ravel():
ax.set_xlabel("Time")
output_path.parent.mkdir(parents=True, exist_ok=True)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
def write_plots(
output_dir: Path,
sample_rows: list[dict],
t_curve: np.ndarray,
true_p: np.ndarray,
true_d: np.ndarray,
pred_p: np.ndarray,
pred_d: np.ndarray,
args: argparse.Namespace,
) -> None:
"""批量绘制代表性样本或最差样本的曲线对比图。"""
plot_dir = output_dir / "plots"
score = np.asarray([r["score"] for r in sample_rows], dtype=np.float64)
random.seed(int(args.seed))
n_random = min(int(args.n_random_plots), len(sample_rows))
n_best = min(int(args.n_best_plots), len(sample_rows))
n_worst = min(int(args.n_worst_plots), len(sample_rows))
best = np.argsort(score)[:n_best].tolist()
worst = np.argsort(-score)[:n_worst].tolist()
random_idx = random.sample(range(len(sample_rows)), n_random)
for idx in random_idx:
plot_sample(
plot_dir / f"sample_{idx:04d}.png",
idx,
t_curve[idx],
true_p[idx],
pred_p[idx],
true_d[idx],
pred_d[idx],
f"Random sample {idx} | score={score[idx]:.4f}",
)
for idx in best:
plot_sample(
plot_dir / f"best_sample_{idx:04d}.png",
idx,
t_curve[idx],
true_p[idx],
pred_p[idx],
true_d[idx],
pred_d[idx],
f"Best sample {idx} | score={score[idx]:.4f}",
)
for idx in worst:
plot_sample(
plot_dir / f"worst_sample_{idx:04d}.png",
idx,
t_curve[idx],
true_p[idx],
pred_p[idx],
true_d[idx],
pred_d[idx],
f"Worst sample {idx} | score={score[idx]:.4f}",
)
def main() -> None:
"""评估按时间点预测的代理模型,分别统计缩放域和原始对数域误差。"""
args = parse_args()
tag = normalize_tag(args.tag)
processed_path = Path(args.processed) if args.processed is not None else processed_path_for_tag(tag)
model_path = Path(args.model) if args.model is not None else default_model_path(tag)
output_dir = Path(args.output_dir) if args.output_dir is not None else default_output_dir(tag)
output_dir.mkdir(parents=True, exist_ok=True)
device_name = args.device or ("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device(device_name)
print("Loading processed dataset...")
data = joblib.load(processed_path)
required = ["X_params_test", "X_schedule_test", "X_time_test", "Y_curve_test"]
missing = [key for key in required if key not in data]
if missing:
# 时间条件模型依赖逐点时间特征;旧版 processed 数据没有这些字段时不能评估。
raise KeyError(f"processed dataset is missing time-conditioned fields: {missing}")
print("Loading model...")
model, checkpoint = load_model(model_path, device)
curve_layout = checkpoint.get("curve_layout") or infer_curve_layout(data)
slices = get_part_slices(curve_layout)
x_params = np.asarray(data["X_params_test"], dtype=np.float32)
x_schedule = np.asarray(data["X_schedule_test"], dtype=np.float32)
x_time = np.asarray(data["X_time_test"], dtype=np.float32)
y_curve = np.asarray(data["Y_curve_test"], dtype=np.float32)
scaler_curve = data["scaler_curve"]
print(
f"test={x_params.shape[0]}, n_time={x_time.shape[1]}, "
f"param_dim={x_params.shape[1]}, schedule_dim={x_schedule.shape[1]}, time_dim={x_time.shape[-1]}"
)
print(f"device={device}, batch_size={args.batch_size}")
pred_scaled = predict_scaled_points(
model=model,
params_x=x_params,
schedule_x=x_schedule,
time_x=x_time,
device=device,
batch_size=int(args.batch_size),
)
p_slice = slices["log_pressure"]
d_slice = slices["log_derivative"]
# 时间条件模型只预测 pressure/derivative 两个通道,不直接预测辅助 slope。
true_p_scaled = y_curve[:, p_slice]
true_d_scaled = y_curve[:, d_slice]
pred_p_scaled = pred_scaled[:, :, 0]
pred_d_scaled = pred_scaled[:, :, 1]
true_p = inverse_curve_part(true_p_scaled, scaler_curve, p_slice)
true_d = inverse_curve_part(true_d_scaled, scaler_curve, d_slice)
pred_p = inverse_curve_part(pred_p_scaled, scaler_curve, p_slice)
pred_d = inverse_curve_part(pred_d_scaled, scaler_curve, d_slice)
# scaled 指标看模型训练空间内的误差raw 指标看真实物理/工程尺度下的误差。
summary = {
"processed_path": str(processed_path),
"model_path": str(model_path),
"device": str(device),
"checkpoint": {
"hidden_dim": int(checkpoint["hidden_dim"]),
"n_blocks": int(checkpoint["n_blocks"]),
"dropout": float(checkpoint["dropout"]),
"use_schedule": bool(checkpoint.get("use_schedule", True)),
},
"scaled_log_pressure": point_metrics(true_p_scaled, pred_p_scaled),
"scaled_log_derivative": point_metrics(true_d_scaled, pred_d_scaled),
"raw_log_pressure": point_metrics(true_p, pred_p),
"raw_log_derivative": point_metrics(true_d, pred_d),
}
rows = sample_metrics(true_p=true_p, pred_p=pred_p, true_d=true_d, pred_d=pred_d)
params = recover_raw_params(data)
pso_mask = build_pso_mask(params, args)
domain_summary = build_domain_summary(rows, params, pso_mask)
# PSO-domain 单独统计,用来判断模型在自动拟合常用参数范围内是否稳定。
summary["pso_domain"] = {
"bounds": {
"k": [float(args.pso_k_min), float(args.pso_k_max)],
"skin": [float(args.pso_skin_min), float(args.pso_skin_max)],
"wellboreC": [float(args.pso_wellboreC_min), float(args.pso_wellboreC_max)],
"phi": [float(args.pso_phi_min), float(args.pso_phi_max)],
"h": [float(args.pso_h_min), float(args.pso_h_max)],
},
"metrics": domain_summary,
}
case_rows, residual_rows = build_worst_case_rows(
sample_rows=rows,
params=params,
data=data,
true_p=true_p,
true_d=true_d,
pred_p=pred_p,
pred_d=pred_d,
pso_mask=pso_mask,
top_k=int(args.top_k_analysis),
)
worst_case_summary = build_worst_case_summary(
sample_rows=rows,
params=params,
pso_mask=pso_mask,
top_k=int(args.top_k_analysis),
)
# 输出分为整体指标、逐样本指标、最差案例和最差残差,便于逐层排查误差来源。
(output_dir / "summary_metrics.json").write_text(
json.dumps(summary, indent=2, ensure_ascii=False),
encoding="utf-8",
)
write_csv(output_dir / "sample_metrics.csv", rows)
write_csv(output_dir / "worst_case_analysis.csv", case_rows)
write_csv(output_dir / "worst_residual_analysis.csv", residual_rows)
(output_dir / "worst_case_summary.json").write_text(
json.dumps(worst_case_summary, indent=2, ensure_ascii=False),
encoding="utf-8",
)
residual_summary = {
# 残差 shift ratio 用于观察坏样本是否主要表现为整条曲线偏移。
"top_k": int(len(residual_rows)),
"top300_p_shift_ratio_median": float(np.median([r["p_shift_ratio"] for r in residual_rows])),
"top300_d_shift_ratio_median": float(np.median([r["d_shift_ratio"] for r in residual_rows])),
"top100_p_shift_ratio_median": float(np.median([r["p_shift_ratio"] for r in residual_rows[:100]])),
"top100_d_shift_ratio_median": float(np.median([r["d_shift_ratio"] for r in residual_rows[:100]])),
"top20": residual_rows[:20],
}
(output_dir / "worst_residual_summary.json").write_text(
json.dumps(residual_summary, indent=2, ensure_ascii=False),
encoding="utf-8",
)
t_curve = np.asarray(data.get("T_curve_test"), dtype=np.float32)
if t_curve.ndim == 2 and t_curve.shape == true_p.shape:
# 只有保存了与曲线形状一致的真实时间坐标时,才绘制时间轴曲线图。
write_plots(output_dir, rows, t_curve, true_p, true_d, pred_p, pred_d, args)
print("\nEvaluation complete.")
print(f"raw_log_pressure RMSE={summary['raw_log_pressure']['rmse']:.6f}, MAE={summary['raw_log_pressure']['mae']:.6f}")
print(f"raw_log_derivative RMSE={summary['raw_log_derivative']['rmse']:.6f}, MAE={summary['raw_log_derivative']['mae']:.6f}")
print(
"PSO-domain: "
f"n={domain_summary['pso_domain']['n']}, "
f"median={domain_summary['pso_domain']['score']['median']:.6f}, "
f"p95={domain_summary['pso_domain']['score']['p95']:.6f}, "
f"score>1={100.0 * domain_summary['pso_domain']['score_gt_1_ratio']:.3f}%"
)
print(f"Artifacts written to: {output_dir}")
if __name__ == "__main__":
main()

@ -78,17 +78,18 @@ def set_seed(seed: int) -> None:
def infer_curve_layout(meta: dict, curve_dim: int) -> dict:
"""从元数据读取曲线分段布局;旧数据没有布局时按压力/导数/斜率三等分回退"""
"""从元数据读取布局;缺失时兼容推断双通道或旧三通道布局"""
curve_layout = meta.get("curve_layout")
if curve_layout is not None:
return curve_layout
n_time_points = curve_dim // 3
n_parts = 3 if curve_dim % 3 == 0 else 2
n_time_points = curve_dim // n_parts
names = ["log_pressure", "log_derivative"] + (["slope"] if n_parts == 3 else [])
return {
"n_time_points": int(n_time_points),
"parts": [
{"name": "log_pressure", "start": 0, "end": n_time_points},
{"name": "log_derivative", "start": n_time_points, "end": 2 * n_time_points},
{"name": "slope", "start": 2 * n_time_points, "end": 3 * n_time_points},
{"name": name, "start": i * n_time_points, "end": (i + 1) * n_time_points}
for i, name in enumerate(names)
],
}

@ -32,10 +32,18 @@ import numpy as np
from src.common.config import Config
from src.common.experiment_paths import config_for_stage, normalize_tag
from src.data.curve_processing import clean_curve_for_dataset, is_valid_curve, resample_curve_to_features
from src.data.curve_processing import (
clean_curve_for_dataset,
is_valid_curve,
resample_curve_to_features_with_time,
)
from src.data.params import Params, Schedule, generate_params_dataset
from src.data.runner_client import CppRunner, read_result_bin
from src.data.schedule_encoding import encode_schedule_to_timegrid
from src.data.schedule_features import (
build_schedule_model_vector,
schedule_meta_feature_names,
use_schedule_meta_features,
)
from src.evaluation.autofit_objective import dual_log_objective
@ -269,7 +277,13 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--config", type=str, default=None)
parser.add_argument(
"--stage",
choices=["fixed_case", "case_neighborhood", "family_random", "family_random_hard"],
choices=[
"fixed_case",
"case_neighborhood",
"family_random",
"family_random_hard",
"family_random_v2_q",
],
default="family_random",
)
parser.add_argument("--output", type=str, default=None, help="Output HDF5 path")
@ -289,6 +303,12 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--max-perturbed-dims", type=int, default=3)
parser.add_argument("--balance-families", action="store_true", default=True)
parser.add_argument("--objective-bins", type=int, default=3)
parser.add_argument(
"--early-stop-valid-factor",
type=float,
default=1.25,
help="Stop an anchor when total valid neighbors reaches this multiple of neighbors-per-anchor, even if some spans are sparse",
)
parser.add_argument("--solver-timeout", type=int, default=120)
parser.add_argument("--well-index", type=int, default=0)
parser.add_argument(
@ -333,21 +353,16 @@ def resolve_output_path(cfg: Config, args: argparse.Namespace) -> Path:
return (cfg.paths.samples_dir / f"{tag}.h5").resolve()
def build_schedule_vector(cfg: Config, schedule: Schedule) -> np.ndarray:
def build_schedule_vector(
cfg: Config,
schedule: Schedule,
family_name: str = "unknown",
) -> np.ndarray:
"""把 Schedule 编码成正演代理模型可直接接收的流量制度特征向量。"""
enc = encode_schedule_to_timegrid(
return build_schedule_model_vector(
cfg,
section_index=int(schedule.sectionIndex),
time_q=schedule.timeQ,
q=schedule.q,
n_sections=len(schedule.timeQ),
)
return np.concatenate(
[
np.asarray(enc.x_sched, dtype=np.float32).reshape(-1),
np.asarray(enc.x_sec, dtype=np.float32).reshape(-1),
],
axis=0,
schedule,
family_name=family_name,
).astype(np.float32)
@ -357,7 +372,7 @@ def run_solver_and_extract_curve(
params: Params,
well_index: int,
timeout: int,
) -> tuple[np.ndarray, dict]:
) -> tuple[np.ndarray, np.ndarray, dict]:
"""调用 C++ 求解器运行一次正演,并把双对数输出重采样为模型曲线向量。"""
ok = runner.run_simulation(params, timeout=timeout, override_schedule=params.schedule, include_schedule=True)
result = read_result_bin(runner.result_bin) if runner.result_bin.exists() else None
@ -382,7 +397,7 @@ def run_solver_and_extract_curve(
if not valid:
raise RuntimeError(f"curve_invalid_{reason}")
curve_feat = resample_curve_to_features(cfg, t_clean, p_clean, d_clean)
curve_feat, curve_time = resample_curve_to_features_with_time(cfg, t_clean, p_clean, d_clean)
raw = {
"t": t_clean.tolist(),
"p": p_clean.tolist(),
@ -390,7 +405,7 @@ def run_solver_and_extract_curve(
"n_steps": int(result["nSteps"]),
"n_wells": int(result["nWells"]),
}
return curve_feat, raw
return curve_feat, curve_time, raw
def params_to_array(params: Params) -> np.ndarray:
@ -570,11 +585,11 @@ def select_objective_stratified_indices(objectives: np.ndarray, k: int, objectiv
def select_multiscale_rows(
rows: list[tuple[np.ndarray, np.ndarray, dict[str, float], float]],
rows: list[tuple[np.ndarray, np.ndarray, dict[str, float], float, np.ndarray]],
k: int,
objective_bins: int,
span_fracs: list[float],
) -> list[tuple[np.ndarray, np.ndarray, dict[str, float], float]]:
) -> list[tuple[np.ndarray, np.ndarray, dict[str, float], float, np.ndarray]]:
"""从不同扰动尺度的候选中挑选代表性行,形成多尺度邻域数据。"""
if len(rows) <= k or len(span_fracs) <= 1:
indices = select_objective_stratified_indices(
@ -659,7 +674,9 @@ def create_output_file(
# anchor_* 保存中心样本neighbor_* 保存围绕该中心扰动后的候选及其真实目标函数。
f.create_dataset("anchor_params", shape=(0, param_dim), maxshape=(None, param_dim), dtype=np.float32, chunks=(64, param_dim))
f.create_dataset("anchor_schedule", shape=(0, schedule_dim), maxshape=(None, schedule_dim), dtype=np.float32, chunks=(64, schedule_dim))
n_time_points = int(curve_dim // 2)
f.create_dataset("anchor_curve", shape=(0, curve_dim), maxshape=(None, curve_dim), dtype=np.float32, chunks=(64, curve_dim))
f.create_dataset("anchor_curve_time", shape=(0, n_time_points), maxshape=(None, n_time_points), dtype=np.float32, chunks=(64, n_time_points))
f.create_dataset("anchor_schedule_meta", shape=(0, len(SCHEDULE_META_NAMES)), maxshape=(None, len(SCHEDULE_META_NAMES)), dtype=np.float32, chunks=(64, len(SCHEDULE_META_NAMES)))
f.create_dataset("anchor_family_name", shape=(0,), maxshape=(None,), dtype=h5py.string_dtype(encoding="utf-8"), chunks=(64,))
f.create_dataset("anchor_section_index", shape=(0,), maxshape=(None,), dtype=np.int32, chunks=(64,))
@ -669,6 +686,7 @@ def create_output_file(
f.create_dataset("neighbor_anchor_id", shape=(0,), maxshape=(None,), dtype=np.int32, chunks=(256,))
f.create_dataset("neighbor_params", shape=(0, param_dim), maxshape=(None, param_dim), dtype=np.float32, chunks=(256, param_dim))
f.create_dataset("neighbor_curve", shape=(0, curve_dim), maxshape=(None, curve_dim), dtype=np.float32, chunks=(256, curve_dim))
f.create_dataset("neighbor_curve_time", shape=(0, n_time_points), maxshape=(None, n_time_points), dtype=np.float32, chunks=(256, n_time_points))
f.create_dataset("neighbor_objective", shape=(0,), maxshape=(None,), dtype=np.float32, chunks=(256,))
f.create_dataset("neighbor_objective_p", shape=(0,), maxshape=(None,), dtype=np.float32, chunks=(256,))
f.create_dataset("neighbor_objective_d", shape=(0,), maxshape=(None,), dtype=np.float32, chunks=(256,))
@ -683,6 +701,7 @@ def append_anchor(
anchor_params: np.ndarray,
schedule_vec: np.ndarray,
curve: np.ndarray,
curve_time: np.ndarray,
schedule_meta: np.ndarray,
family_name: str,
section_index: int,
@ -696,6 +715,7 @@ def append_anchor(
("anchor_params", anchor_params.reshape(1, -1)),
("anchor_schedule", schedule_vec.reshape(1, -1)),
("anchor_curve", curve.reshape(1, -1)),
("anchor_curve_time", curve_time.reshape(1, -1)),
("anchor_schedule_meta", schedule_meta.reshape(1, -1)),
]:
ds = f[name]
@ -719,6 +739,7 @@ def append_neighbors(
anchor_id: int,
params_list: list[np.ndarray],
curve_list: list[np.ndarray],
curve_time_list: list[np.ndarray],
obj_list: list[dict[str, float]],
span_frac_list: list[float],
) -> None:
@ -742,6 +763,7 @@ def append_neighbors(
resize_1d("neighbor_anchor_id")[start:end] = int(anchor_id)
resize_2d("neighbor_params")[start:end] = np.stack(params_list, axis=0).astype(np.float32)
resize_2d("neighbor_curve")[start:end] = np.stack(curve_list, axis=0).astype(np.float32)
resize_2d("neighbor_curve_time")[start:end] = np.stack(curve_time_list, axis=0).astype(np.float32)
resize_1d("neighbor_objective")[start:end] = np.asarray([x["dual_log_objective"] for x in obj_list], dtype=np.float32)
resize_1d("neighbor_objective_p")[start:end] = np.asarray([x["log_pressure_objective"] for x in obj_list], dtype=np.float32)
resize_1d("neighbor_objective_d")[start:end] = np.asarray([x["log_derivative_objective"] for x in obj_list], dtype=np.float32)
@ -758,6 +780,8 @@ def main() -> None:
span_fracs = resolve_span_fracs(args)
schedule_dim = int(np.prod(cfg.schedule_grid_shape)) + int(cfg.sec_feat_dim)
if use_schedule_meta_features(cfg):
schedule_dim += len(schedule_meta_feature_names(cfg))
curve_dim = int(cfg.curve_dim)
param_dim = len(cfg.raw["params"]["all_physical_param_names"])
local_search_names = search_param_names(cfg)
@ -809,7 +833,7 @@ def main() -> None:
use_server=bool(args.use_runner_server),
)
try:
anchor_curve, _ = run_solver_and_extract_curve(
anchor_curve, anchor_curve_time, _ = run_solver_and_extract_curve(
runner=anchor_runner,
cfg=cfg,
params=anchor_params,
@ -822,12 +846,17 @@ def main() -> None:
anchor_runner.close()
continue
schedule_vec = build_schedule_vector(cfg, anchor_params.schedule)
schedule_vec = build_schedule_vector(
cfg,
anchor_params.schedule,
family_name=family_name,
)
anchor_id = append_anchor(
f=f,
anchor_params=params_to_array(anchor_params),
schedule_vec=schedule_vec,
curve=anchor_curve,
curve_time=anchor_curve_time,
schedule_meta=schedule_meta,
family_name=family_name,
section_index=int(anchor_params.schedule.sectionIndex),
@ -841,7 +870,9 @@ def main() -> None:
f"use_server={bool(args.use_runner_server)}"
)
valid_neighbor_rows: list[tuple[np.ndarray, np.ndarray, dict[str, float], float]] = []
valid_neighbor_rows: list[
tuple[np.ndarray, np.ndarray, dict[str, float], float, np.ndarray]
] = []
valid_span_counter: Counter[str] = Counter()
max_attempts = max(
int(args.neighbors_per_anchor),
@ -849,9 +880,13 @@ def main() -> None:
) * len(span_fracs)
span_targets = build_span_targets(int(args.neighbors_per_anchor), span_fracs)
span_stop_targets = {
float(span_frac): max(int(target), int(target) * max(int(args.objective_bins), 1))
float(span_frac): int(target)
for span_frac, target in span_targets.items()
}
total_stop_target = max(
int(args.neighbors_per_anchor),
int(np.ceil(float(args.neighbors_per_anchor) * float(args.early_stop_valid_factor))),
)
for attempt in range(max_attempts):
span_frac = float(span_fracs[attempt % len(span_fracs)])
@ -863,19 +898,27 @@ def main() -> None:
max_perturbed_dims=int(args.max_perturbed_dims),
)
try:
cand_curve, _ = run_solver_and_extract_curve(
cand_curve, cand_curve_time, _ = run_solver_and_extract_curve(
runner=anchor_runner,
cfg=cfg,
params=cand,
well_index=int(args.well_index),
timeout=int(args.solver_timeout),
)
n_time_points = curve_dim // 2
obj = dual_log_objective(anchor_curve, cand_curve, {"parts": [
{"name": "log_pressure", "start": 0, "end": curve_dim // 3},
{"name": "log_derivative", "start": curve_dim // 3, "end": 2 * curve_dim // 3},
{"name": "slope", "start": 2 * curve_dim // 3, "end": curve_dim},
{"name": "log_pressure", "start": 0, "end": n_time_points},
{"name": "log_derivative", "start": n_time_points, "end": curve_dim},
]})
valid_neighbor_rows.append((params_to_array(cand), cand_curve.astype(np.float32), obj, span_frac))
valid_neighbor_rows.append(
(
params_to_array(cand),
cand_curve.astype(np.float32),
obj,
span_frac,
cand_curve_time.astype(np.float32),
)
)
summary["neighbor_span_counter_valid"][f"{span_frac:g}"] += 1
valid_span_counter[f"{span_frac:g}"] += 1
except Exception as exc:
@ -891,6 +934,13 @@ def main() -> None:
f"valid={len(valid_neighbor_rows)} per_span={dict(sorted(valid_span_counter.items()))}"
)
break
if len(valid_neighbor_rows) >= total_stop_target:
print(
f"[anchor {anchor_id:03d}] early-stop-total attempts={attempt + 1}/{max_attempts} "
f"valid={len(valid_neighbor_rows)} target={total_stop_target} "
f"per_span={dict(sorted(valid_span_counter.items()))}"
)
break
if (attempt + 1) % max(12, len(span_fracs) * 4) == 0:
print(
@ -909,6 +959,7 @@ def main() -> None:
neighbor_curve_rows = [x[1] for x in selected_rows]
neighbor_obj_rows = [x[2] for x in selected_rows]
neighbor_span_rows = [float(x[3]) for x in selected_rows]
neighbor_curve_time_rows = [x[4] for x in selected_rows]
for span_frac in neighbor_span_rows:
summary["neighbor_span_counter_kept"][f"{span_frac:g}"] += 1
@ -918,6 +969,7 @@ def main() -> None:
anchor_id=anchor_id,
params_list=neighbor_params_rows,
curve_list=neighbor_curve_rows,
curve_time_list=neighbor_curve_time_rows,
obj_list=neighbor_obj_rows,
span_frac_list=neighbor_span_rows,
)

@ -17,7 +17,7 @@ ROOT = Path(__file__).resolve().parents[1]
sys.path.append(str(ROOT))
from src.common.experiment_paths import normalize_tag, processed_path_for_tag
from src.data.preprocess import preprocess_dataset
from src.data.preprocess import migrate_processed_dataset_without_slope, preprocess_dataset
def main() -> None:
@ -29,7 +29,7 @@ def main() -> None:
"--input",
type=str,
required=True,
help="Path to the generated .h5 dataset",
help="Path to generated .h5 dataset or legacy processed .pkl",
)
parser.add_argument(
"--output",
@ -61,14 +61,18 @@ def main() -> None:
else processed_path_for_tag(tag)
)
preprocess_dataset(
input_path=Path(args.input),
output_path=output_path,
test_size=args.test_size,
val_size=args.val_size,
random_seed=args.seed,
use_param_feature_transform=not args.no_param_feature_transform,
)
input_path = Path(args.input)
if input_path.suffix.lower() == ".pkl":
migrate_processed_dataset_without_slope(input_path, output_path)
else:
preprocess_dataset(
input_path=input_path,
output_path=output_path,
test_size=args.test_size,
val_size=args.val_size,
random_seed=args.seed,
use_param_feature_transform=not args.no_param_feature_transform,
)
if __name__ == "__main__":

@ -411,6 +411,7 @@ def main() -> None:
params=target_params,
well_index=int(args.well_index),
timeout=int(args.solver_timeout),
target_curve_dim=int(processed["meta"]["curve_dim"]),
)
except Exception as exc:
failed_case_rows.append(
@ -445,6 +446,7 @@ def main() -> None:
params=cand,
well_index=int(args.well_index),
timeout=int(args.solver_timeout),
target_curve_dim=int(processed["meta"]["curve_dim"]),
)
pred_curve = predict_surrogate_curve(
processed=processed,

@ -34,7 +34,7 @@ from src.common.experiment_paths import (
normalize_tag,
processed_path_for_tag,
)
from src.data.curve_processing import clean_curve_for_dataset, is_valid_curve, resample_curve_to_features
from src.data.curve_processing import clean_curve_for_dataset, is_valid_curve, resample_curve_to_model_features
from src.data.params import Params, Schedule
from src.evaluation.autofit_objective import dual_log_objective
@ -151,10 +151,8 @@ def build_target_curve_from_meta(cfg: Config, processed: dict, meta: dict) -> np
if not valid:
raise RuntimeError(f"Target loglog curve from meta is invalid: {reason}")
curve = resample_curve_to_features(cfg, t_clean, p_clean, d_clean)
curve_dim = int(processed["meta"]["curve_dim"])
if curve.size != curve_dim:
raise RuntimeError(f"Target curve dim mismatch after resample: {curve.size} != {curve_dim}")
curve = resample_curve_to_model_features(cfg, t_clean, p_clean, d_clean, curve_dim)
return curve.astype(np.float32)

@ -29,7 +29,7 @@ from src.common.experiment_paths import (
normalize_tag,
processed_path_for_tag,
)
from src.data.curve_processing import clean_curve_for_dataset, is_valid_curve, resample_curve_to_features
from src.data.curve_processing import clean_curve_for_dataset, is_valid_curve, resample_curve_to_model_features
from src.data.params import Params, Schedule
from src.evaluation.autofit_objective import dual_log_objective
@ -114,10 +114,8 @@ def build_target_curve_from_meta(cfg: Config, processed: dict, meta: dict) -> np
valid, reason = is_valid_curve(cfg, t_clean, p_clean, d_clean)
if not valid:
raise RuntimeError(f"Target loglog curve from meta is invalid: {reason}")
curve = resample_curve_to_features(cfg, t_clean, p_clean, d_clean)
curve_dim = int(processed["meta"]["curve_dim"])
if curve.size != curve_dim:
raise RuntimeError(f"Target curve dim mismatch after resample: {curve.size} != {curve_dim}")
curve = resample_curve_to_model_features(cfg, t_clean, p_clean, d_clean, curve_dim)
return curve.astype(np.float32)

@ -55,7 +55,6 @@ def main() -> None:
parser.add_argument("--w-pressure", type=float, default=1.0)
parser.add_argument("--w-derivative", type=float, default=2.0)
parser.add_argument("--w-slope", type=float, default=0.0)
parser.add_argument("--w-bias-pressure", type=float, default=0.15)
parser.add_argument("--w-bias-derivative", type=float, default=0.05)
parser.add_argument("--w-derivative-shape", type=float, default=0.10)
@ -102,7 +101,6 @@ def main() -> None:
weights=LossWeights(
pressure=args.w_pressure,
derivative=args.w_derivative,
slope=args.w_slope,
bias_pressure=args.w_bias_pressure,
bias_derivative=args.w_bias_derivative,
derivative_shape=args.w_derivative_shape,

@ -1,156 +0,0 @@
"""训练多个随机种子的正演代理模型集成。
脚本基于同一份 processed 数据和训练超参数循环启动多次 `train_forward`每个成员使用
独立 seed 和输出目录所得模型集合用于后续不确定性估计误差风险分析和 fallback 筛选
"""
# pylint: disable=import-error,wrong-import-position,duplicate-code
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
sys.path.append(str(ROOT))
from src.common.experiment_paths import normalize_tag, processed_path_for_tag
from src.training.train_forward import (
LossConfig,
LossWeights,
ModelConfig,
OptimConfig,
SampleReweightConfig,
TrainConfig,
TrainRuntime,
train_forward,
)
def parse_seed_list(seed_text: str) -> list[int]:
"""解析逗号分隔的随机种子列表,用于训练或评估模型集成。"""
seeds = []
for item in str(seed_text).split(","):
item = item.strip()
if not item:
continue
seeds.append(int(item))
if not seeds:
raise ValueError("至少需要一个 seed")
return seeds
def default_output_root(tag: str | None, use_schedule: bool) -> Path:
"""根据实验标签生成集成训练的默认模型输出根目录。"""
suffix = "" if use_schedule else "_no_schedule"
if tag:
return Path("models") / f"forward_surrogate_{tag}_ensemble{suffix}"
return Path("models") / f"forward_surrogate_ensemble{suffix}"
def main() -> None:
"""按多个随机种子重复训练正演代理模型,并写出集成清单。"""
parser = argparse.ArgumentParser(description="Train a deep-ensemble forward surrogate for UQ")
parser.add_argument("--processed", type=str, default=None, help="Processed dataset path")
parser.add_argument("--tag", type=str, default=None, help="Experiment tag for auto naming")
parser.add_argument("--output-root", type=str, default=None, help="Optional ensemble root directory")
parser.add_argument("--seeds", type=str, default="41,42,43", help="Comma-separated seed list")
parser.add_argument("--batch-size", type=int, default=256)
parser.add_argument("--epochs", type=int, default=220)
parser.add_argument("--lr", type=float, default=0.001)
parser.add_argument("--weight-decay", type=float, default=0.0005)
parser.add_argument("--hidden-dim", type=int, default=256)
parser.add_argument("--dropout", type=float, default=0.1)
parser.add_argument("--w-pressure", type=float, default=1.0)
parser.add_argument("--w-derivative", type=float, default=2.0)
parser.add_argument("--w-slope", type=float, default=0.0)
parser.add_argument("--w-bias-pressure", type=float, default=0.15)
parser.add_argument("--w-bias-derivative", type=float, default=0.05)
parser.add_argument("--w-derivative-shape", type=float, default=0.10)
parser.add_argument("--w-autofit-pressure", type=float, default=0.0)
parser.add_argument("--w-autofit-derivative", type=float, default=0.0)
parser.add_argument("--huber-beta", type=float, default=0.05)
parser.add_argument("--use-sample-reweight", action="store_true", default=True)
parser.add_argument("--no-sample-reweight", action="store_false", dest="use_sample_reweight")
parser.add_argument("--sample-reweight-alpha", type=float, default=0.4)
parser.add_argument("--sample-weight-min", type=float, default=1.0)
parser.add_argument("--sample-weight-max", type=float, default=2.5)
parser.add_argument("--no-schedule", action="store_true")
args = parser.parse_args()
tag = normalize_tag(args.tag)
use_schedule = not args.no_schedule
seeds = parse_seed_list(args.seeds)
processed_path = Path(args.processed) if args.processed is not None else processed_path_for_tag(tag)
output_root = Path(args.output_root) if args.output_root is not None else default_output_root(tag, use_schedule)
output_root.mkdir(parents=True, exist_ok=True)
# manifest 记录每个成员模型的 seed、checkpoint 和 metrics供不确定性评估脚本批量加载。
manifest = {
"tag": tag,
"processed_path": str(processed_path),
"use_schedule": use_schedule,
"seeds": seeds,
"members": [],
}
for seed in seeds:
member_dir = output_root / f"seed_{seed}"
print(f"\n=== Training ensemble member seed={seed} -> {member_dir} ===")
# 除随机种子和输出目录外,各成员使用相同数据和损失权重,形成深度集成。
cfg = TrainConfig(
processed_path=processed_path,
output_dir=member_dir,
runtime=TrainRuntime(seed=seed),
optim=OptimConfig(
batch_size=args.batch_size,
epochs=args.epochs,
lr=args.lr,
weight_decay=args.weight_decay,
),
model=ModelConfig(
hidden_dim=args.hidden_dim,
dropout=args.dropout,
use_schedule=use_schedule,
),
loss=LossConfig(
weights=LossWeights(
pressure=args.w_pressure,
derivative=args.w_derivative,
slope=args.w_slope,
bias_pressure=args.w_bias_pressure,
bias_derivative=args.w_bias_derivative,
derivative_shape=args.w_derivative_shape,
autofit_pressure=args.w_autofit_pressure,
autofit_derivative=args.w_autofit_derivative,
),
huber_beta=args.huber_beta,
),
sample_reweight=SampleReweightConfig(
enabled=args.use_sample_reweight,
alpha=args.sample_reweight_alpha,
weight_min=args.sample_weight_min,
weight_max=args.sample_weight_max,
),
)
train_forward(cfg)
manifest["members"].append(
{
"seed": seed,
"dir": str(member_dir),
"checkpoint": str(member_dir / "forward_surrogate_best.pt"),
"metrics": str(member_dir / "metrics.json"),
}
)
with open(output_root / "ensemble_manifest.json", "w", encoding="utf-8") as f:
json.dump(manifest, f, ensure_ascii=False, indent=2)
print("\nEnsemble training complete.")
print(f"Manifest saved: {output_root / 'ensemble_manifest.json'}")
if __name__ == "__main__":
main()

@ -1,104 +0,0 @@
"""训练时间条件正演代理模型。
脚本把预处理数据展开为按时间点监督的训练任务并将命令行参数封装为
`TimeConditionedTrainConfig`模型学习在给定参数制度和时间特征时预测压力/导数
适合处理可变时间采样或逐点推理场景
"""
# pylint: disable=import-error,wrong-import-position
from __future__ import annotations
import argparse
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
sys.path.append(str(ROOT))
from src.common.experiment_paths import normalize_tag, processed_path_for_tag
from src.training.train_time_conditioned import (
RiskWeightConfig,
TimeConditionedTrainConfig,
TimeLossConfig,
TimeModelConfig,
TimeOptimConfig,
TimeRuntimeConfig,
train_time_conditioned,
)
def main() -> None:
"""读取训练参数并训练按时间点展开的时间条件代理模型。"""
parser = argparse.ArgumentParser(description="Train a time-conditioned point-wise forward surrogate")
parser.add_argument("--processed", type=str, default=None)
parser.add_argument("--tag", type=str, default=None)
parser.add_argument("--output-dir", type=str, default=None)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--batch-size", type=int, default=4096)
parser.add_argument("--epochs", type=int, default=120)
parser.add_argument("--lr", type=float, default=1.0e-3)
parser.add_argument("--weight-decay", type=float, default=1.0e-4)
parser.add_argument("--hidden-dim", type=int, default=256)
parser.add_argument("--n-blocks", type=int, default=4)
parser.add_argument("--dropout", type=float, default=0.05)
parser.add_argument("--w-pressure", type=float, default=1.0)
parser.add_argument("--w-derivative", type=float, default=2.0)
parser.add_argument("--huber-beta", type=float, default=0.05)
parser.add_argument("--no-schedule", action="store_true")
parser.add_argument(
"--sample-weight-mode",
choices=["none", "risk_region"],
default="none",
help="Optional risk-region sample weighting; default keeps the original unweighted training behavior",
)
parser.add_argument("--risk-weight", type=float, default=2.5)
parser.add_argument("--skin-lt-minus8-weight", type=float, default=3.5)
parser.add_argument("--sample-weight-min", type=float, default=1.0)
parser.add_argument("--sample-weight-max", type=float, default=4.0)
args = parser.parse_args()
tag = normalize_tag(args.tag)
processed_path = Path(args.processed) if args.processed is not None else processed_path_for_tag(tag)
# 时间条件模型使用独立目录,避免覆盖固定长度曲线代理模型的 checkpoint。
if args.output_dir is not None:
output_dir = Path(args.output_dir)
elif tag:
output_dir = Path("models") / f"time_conditioned_surrogate_{tag}"
else:
output_dir = Path("models") / "time_conditioned_surrogate"
cfg = TimeConditionedTrainConfig(
processed_path=processed_path,
output_dir=output_dir,
runtime=TimeRuntimeConfig(seed=int(args.seed)),
optim=TimeOptimConfig(
batch_size=int(args.batch_size),
epochs=int(args.epochs),
lr=float(args.lr),
weight_decay=float(args.weight_decay),
),
model=TimeModelConfig(
hidden_dim=int(args.hidden_dim),
n_blocks=int(args.n_blocks),
dropout=float(args.dropout),
use_schedule=not bool(args.no_schedule),
),
loss=TimeLossConfig(
w_pressure=float(args.w_pressure),
w_derivative=float(args.w_derivative),
huber_beta=float(args.huber_beta),
),
risk_weight=RiskWeightConfig(
mode=str(args.sample_weight_mode),
risk_weight=float(args.risk_weight),
skin_lt_minus8_weight=float(args.skin_lt_minus8_weight),
weight_min=float(args.sample_weight_min),
weight_max=float(args.sample_weight_max),
),
)
train_time_conditioned(cfg)
if __name__ == "__main__":
main()

@ -26,7 +26,7 @@ import torch
from src.common.config import Config
from src.common.experiment_paths import config_for_stage, model_checkpoint_for_tag, normalize_tag, processed_path_for_tag
from src.data.curve_processing import clean_curve_for_dataset, is_valid_curve, resample_curve_to_features
from src.data.curve_processing import clean_curve_for_dataset, is_valid_curve, resample_curve_to_model_features
from src.data.param_features import param_feature_transform_from_meta, transform_param_features
from src.data.params import Params, Schedule
from src.data.runner_client import CppRunner, read_result_bin
@ -87,17 +87,18 @@ def parse_args() -> argparse.Namespace:
def infer_curve_layout(meta: dict, curve_dim: int) -> dict:
"""从元数据读取曲线分段布局;旧数据没有布局时按压力/导数/斜率三等分回退"""
"""从元数据读取布局;缺失时兼容推断双通道或旧三通道布局"""
curve_layout = meta.get("curve_layout")
if curve_layout is not None:
return curve_layout
n_time_points = curve_dim // 3
n_parts = 3 if curve_dim % 3 == 0 else 2
n_time_points = curve_dim // n_parts
names = ["log_pressure", "log_derivative"] + (["slope"] if n_parts == 3 else [])
return {
"n_time_points": int(n_time_points),
"parts": [
{"name": "log_pressure", "start": 0, "end": n_time_points},
{"name": "log_derivative", "start": n_time_points, "end": 2 * n_time_points},
{"name": "slope", "start": 2 * n_time_points, "end": 3 * n_time_points},
{"name": name, "start": i * n_time_points, "end": (i + 1) * n_time_points}
for i, name in enumerate(names)
],
}
@ -218,6 +219,7 @@ def run_solver_and_extract_curve(
params: Params,
well_index: int,
timeout: int,
target_curve_dim: int,
) -> tuple[np.ndarray, dict]:
"""调用 C++ 求解器运行一次正演,并把双对数输出重采样为模型曲线向量。"""
ok = runner.run_simulation(params, timeout=timeout, override_schedule=params.schedule, include_schedule=True)
@ -243,7 +245,9 @@ def run_solver_and_extract_curve(
if not valid:
raise RuntimeError(f"Curve failed validation: {reason}")
curve_feat = resample_curve_to_features(cfg, t_clean, p_clean, d_clean)
curve_feat = resample_curve_to_model_features(
cfg, t_clean, p_clean, d_clean, target_curve_dim
)
raw = {
"t": t_clean.tolist(),
"p": p_clean.tolist(),
@ -429,6 +433,7 @@ def main() -> None:
params=target_params,
well_index=args.well_index,
timeout=args.solver_timeout,
target_curve_dim=int(processed["meta"]["curve_dim"]),
)
if not args.reuse_runner:
@ -458,6 +463,7 @@ def main() -> None:
params=cand,
well_index=args.well_index,
timeout=args.solver_timeout,
target_curve_dim=int(processed["meta"]["curve_dim"]),
)
pred_curve = predict_surrogate_curve(
processed=processed,

@ -240,6 +240,7 @@ def main() -> None:
params=target_params,
well_index=int(args.well_index),
timeout=int(args.solver_timeout),
target_curve_dim=int(processed["meta"]["curve_dim"]),
)
except Exception as exc:
failed_targets.append({"attempt_id": attempt, "reason": str(exc)})
@ -279,6 +280,7 @@ def main() -> None:
params=cand,
well_index=int(args.well_index),
timeout=int(args.solver_timeout),
target_curve_dim=int(processed["meta"]["curve_dim"]),
)
pred_curve = predict_surrogate_curve(
processed=processed,

@ -87,8 +87,7 @@ class Config:
def curve_dim(self) -> int:
"""返回配置中的曲线输出维度。"""
n_time_points = int(self.get("curve_processing", "n_time_points", default=160))
use_slope = bool(self.get("curve_processing", "use_slope_feature", default=True))
return (2 + (1 if use_slope else 0)) * n_time_points
return 2 * n_time_points
@property
def sec_feat_dim(self) -> int:

@ -4,8 +4,8 @@
C++ 数值试井求解器输出的双对数曲线通常是不等长时间序列本模块负责在进入
机器学习数据集之前完成质量检查无效点过滤时间排序去重对数变换和重采样
输出曲线特征采用固定布局log_pressurelog_derivative以及可选的 slope
辅助通道按时间顺序拼接该布局会被预处理训练评估脚本共同使用因此这里
输出曲线特征采用固定布局log_pressurelog_derivative 按时间顺序拼接
该布局会被预处理训练评估脚本共同使用因此这里
的维度和顺序必须与 Config.curve_dimmeta.curve_layout 保持一致
"""
@ -167,15 +167,12 @@ def resample_curve_to_features_with_time(
"""把变长曲线转换为模型训练使用的固定长度特征。
输入曲线先转换为 log_pressure log_abs_derivative然后插值到几何时间网格
如果配置启用 slope 通道会额外计算 log_pressure log_time 的中心差分斜率
Returns:
tuple[np.ndarray, np.ndarray]: 第一个数组是按布局拼接的曲线特征形状为
[cfg.curve_dim]第二个数组是该样本使用的时间网格形状为 [n_time_points]
"""
eps = float(cfg.get("curve_processing", "feature_epsilon", default=1e-12))
n_time_points = int(cfg.get("curve_processing", "n_time_points", default=160))
use_slope = bool(cfg.get("curve_processing", "use_slope_feature", default=True))
t = np.asarray(t, dtype=np.float64).reshape(-1)
p = np.asarray(p, dtype=np.float64).reshape(-1)
@ -193,27 +190,43 @@ def resample_curve_to_features_with_time(
lp = np.interp(grid, t, logp).astype(np.float32)
ld = np.interp(grid, t, logd).astype(np.float32)
feats = [lp, ld]
if use_slope:
# slope 作为辅助特征保留;即使训练权重设为 0也能保持数据布局兼容。
logt = np.log(np.maximum(grid, eps)).astype(np.float64)
s = np.zeros((n_time_points,), dtype=np.float32)
if n_time_points >= 3:
denom = logt[2:] - logt[:-2]
denom = np.maximum(denom, 1e-12)
s[1:-1] = ((lp[2:] - lp[:-2]) / denom).astype(np.float32)
s[0] = s[1]
s[-1] = s[-2]
feats.append(s)
x = np.concatenate(feats, axis=0).astype(np.float32)
x = np.concatenate([lp, ld], axis=0).astype(np.float32)
if x.size != cfg.curve_dim:
raise RuntimeError(f"curve feature dim mismatch: got {x.size}, expect {cfg.curve_dim}")
return x, grid.astype(np.float32)
def resample_curve_to_features(cfg: Config, t: np.ndarray, p: np.ndarray, d: np.ndarray) -> np.ndarray:
"""将不等长双对数曲线插值为固定长度的压力、导数和斜率拼接向量。"""
"""将不等长双对数曲线插值为固定长度的压力和导数拼接向量。"""
features, _ = resample_curve_to_features_with_time(cfg, t, p, d)
return features
def resample_curve_to_model_features(
cfg: Config,
t: np.ndarray,
p: np.ndarray,
d: np.ndarray,
target_curve_dim: int,
) -> np.ndarray:
"""按模型维度重采样;仅为旧 480 维模型临时补出 slope 通道。"""
features, grid = resample_curve_to_features_with_time(cfg, t, p, d)
target_curve_dim = int(target_curve_dim)
if features.size == target_curve_dim:
return features
n_time_points = grid.size
if target_curve_dim != features.size + n_time_points:
raise RuntimeError(
f"无法把 {features.size} 维双通道曲线适配为模型要求的 {target_curve_dim}"
)
lp = features[:n_time_points]
logt = np.log(np.maximum(grid.astype(np.float64), 1e-12))
slope = np.zeros((n_time_points,), dtype=np.float32)
if n_time_points >= 3:
denom = np.maximum(logt[2:] - logt[:-2], 1e-12)
slope[1:-1] = ((lp[2:] - lp[:-2]) / denom).astype(np.float32)
slope[0] = slope[1]
slope[-1] = slope[-2]
return np.concatenate([features, slope], axis=0).astype(np.float32)

@ -473,7 +473,7 @@ def _worker_simulate_parallel(args):
if not ok:
return task_idx, None, reason, None
# 模型监督目标固定为 pressure/derivative/slope 三段拼接向量,同时保留时间坐标。
# 模型监督目标固定为 pressure/derivative段拼接向量,同时保留时间坐标。
curve_feat, curve_time = resample_curve_to_features_with_time(cfg, t, p_curve, d_curve)
sec = int(np.clip(int(sch.sectionIndex), 1, max(len(sch.timeQ), 1)))
@ -549,7 +549,7 @@ class HDF5Appender:
self.param_dim = int(param_dim)
self.schedule_dim = int(schedule_dim)
self.curve_dim = int(curve_dim)
self.time_dim = int(curve_dim) // 3
self.time_dim = int(curve_dim) // 2
self.f = h5py.File(self.filepath, "w")
self._n = 0

@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
"""原始 HDF5 样本预处理。
数据生成阶段写出的 HDF5 文件仍处在原始样本状态参数尚未做特征变换输入和
@ -12,6 +12,7 @@ payload 中保留 curve_layout、参数变换配置、流量制度元数据和
from __future__ import annotations
import copy
from pathlib import Path
import h5py
@ -21,7 +22,6 @@ from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from src.data.param_features import build_param_feature_transform, transform_param_features
from src.data.time_features import build_default_curve_time, build_time_point_features
@ -64,6 +64,97 @@ def _validate_optional_length(name: str, arr: np.ndarray | None, expected_n: int
raise ValueError(f"{name} row count does not match main arrays: {len(arr)} != {expected_n}")
def _curve_keep_indices(meta: dict, curve_dim: int) -> np.ndarray:
"""返回旧 processed 曲线中 pressure/derivative 两段的列索引。"""
layout = meta.get("curve_layout") or {}
parts = {str(part["name"]): part for part in layout.get("parts", [])}
if "log_pressure" in parts and "log_derivative" in parts:
indices = []
for name in ("log_pressure", "log_derivative"):
part = parts[name]
indices.extend(range(int(part["start"]), int(part["end"])))
return np.asarray(indices, dtype=np.int64)
if curve_dim % 3 != 0:
raise ValueError(
"processed 数据没有 curve_layout且曲线维度不能按旧版三通道布局识别"
)
n_time_points = curve_dim // 3
return np.arange(2 * n_time_points, dtype=np.int64)
def _slice_standard_scaler(scaler: StandardScaler, keep_indices: np.ndarray) -> StandardScaler:
"""裁剪按列独立拟合的 StandardScaler同时保留其拟合状态。"""
result = copy.deepcopy(scaler)
for name in ("mean_", "scale_", "var_"):
value = getattr(result, name, None)
if value is not None:
setattr(result, name, np.asarray(value)[keep_indices].copy())
result.n_features_in_ = int(keep_indices.size)
return result
def migrate_processed_dataset_without_slope(input_path: Path, output_path: Path) -> None:
"""把旧三通道 processed pkl 精确迁移为 pressure/derivative 双通道数据。"""
if not input_path.exists():
raise FileNotFoundError(f"Input file not found: {input_path}")
source = joblib.load(input_path)
required = ["Y_curve_train", "Y_curve_val", "Y_curve_test", "scaler_curve", "meta"]
missing = [name for name in required if name not in source]
if missing:
raise KeyError(f"processed 数据缺少字段: {missing}")
curve_dim = int(source["Y_curve_train"].shape[1])
keep_indices = _curve_keep_indices(source.get("meta", {}), curve_dim)
if keep_indices.size == curve_dim:
raise ValueError("输入 processed 数据已经不包含 slope无需迁移")
if keep_indices.size % 2 != 0:
raise ValueError(f"保留后的曲线维度必须能被 2 整除,当前为 {keep_indices.size}")
payload = dict(source)
for split in ("train", "val", "test"):
key = f"Y_curve_{split}"
payload[key] = np.asarray(source[key][:, keep_indices], dtype=np.float32)
payload["scaler_curve"] = _slice_standard_scaler(source["scaler_curve"], keep_indices)
# 这些字段属于已停用的 time-conditioned 路径,不进入旧正演模型训练包。
for key in (
"T_curve_train", "T_curve_val", "T_curve_test",
"X_time_train", "X_time_val", "X_time_test", "scaler_time",
):
payload.pop(key, None)
n_time_points = int(keep_indices.size // 2)
meta = dict(source.get("meta", {}))
meta.update(
{
"curve_dim": int(keep_indices.size),
"raw_curve_dim": curve_dim,
"dropped_legacy_slope": True,
"migrated_from_processed": str(input_path),
"curve_layout": {
"n_time_points": n_time_points,
"parts": [
{"name": "log_pressure", "start": 0, "end": n_time_points},
{"name": "log_derivative", "start": n_time_points, "end": 2 * n_time_points},
],
},
}
)
meta.pop("time_feature_dim", None)
meta.pop("curve_time_source", None)
payload["meta"] = meta
output_path.parent.mkdir(parents=True, exist_ok=True)
joblib.dump(payload, output_path)
print(f"Processed migration complete, saved to: {output_path}")
print(
f"train={len(payload['Y_curve_train'])}, val={len(payload['Y_curve_val'])}, "
f"test={len(payload['Y_curve_test'])}, curve_dim={curve_dim}->{keep_indices.size}"
)
def preprocess_dataset(
input_path: Path,
output_path: Path,
@ -75,9 +166,8 @@ def preprocess_dataset(
"""将原始 HDF5 样本整理为训练脚本可直接加载的 joblib 数据包。
处理步骤包括读取 params/schedule/curve 等核心数组保留可选元数据构造参数
特征变换划分 train/val/test只在训练集上拟合 StandardScaler并生成时间条件
模型需要的逐点时间特征输出 payload 同时保存 scaler meta确保后续评估可以
把标准化预测恢复到原始曲线尺度并按 curve_layout 正确拆分压力导数和 slope
特征变换划分 train/val/test并只在训练集上拟合 StandardScaler输出 payload
同时保存 scaler meta确保后续评估可以恢复原始尺度并正确拆分压力和导数
"""
if not input_path.exists():
raise FileNotFoundError(f"Input file not found: {input_path}")
@ -91,13 +181,6 @@ def preprocess_dataset(
x_params = _ensure_2d("params", f["params"][:])
x_schedule = _ensure_2d("schedule", f["schedule"][:])
y_curve = _ensure_2d("curve", f["curve"][:])
if "curve_time" in f:
curve_time = _ensure_2d("curve_time", f["curve_time"][:])
curve_time_source = "saved_curve_time"
else:
curve_time = None
curve_time_source = "synthetic_unit_time"
# 可选元数据会保留下来,供后续分析和排序验证使用;
# 基础正演训练循环不依赖这些字段。
param_names = _decode_attr_list(f.attrs.get("param_names"))
@ -126,11 +209,19 @@ def preprocess_dataset(
if len(x_schedule) != n or len(y_curve) != n:
raise ValueError("params / schedule / curve row counts do not match")
raw_curve_dim = int(y_curve.shape[1])
if raw_curve_dim % 3 == 0:
# 兼容已有的 pressure/derivative/slope HDF5预处理时丢弃未参与训练的 slope。
n_time_points = raw_curve_dim // 3
y_curve = y_curve[:, : 2 * n_time_points]
elif raw_curve_dim % 2 == 0:
n_time_points = raw_curve_dim // 2
else:
raise ValueError(
f"curve 维度 {raw_curve_dim} 无法识别;期望 2*N压力/导数)或 "
"3*N旧版压力/导数/slope"
)
curve_dim = int(y_curve.shape[1])
n_time_points = curve_dim // 3
if curve_time is None:
curve_time = build_default_curve_time(n, n_time_points)
_validate_optional_length("curve_time", curve_time, n)
param_feature_transform = build_param_feature_transform(
param_names=param_names,
@ -170,17 +261,14 @@ def preprocess_dataset(
x_params_train = x_params_features[idx_train]
x_schedule_train = x_schedule[idx_train]
y_curve_train = y_curve[idx_train]
curve_time_train = curve_time[idx_train]
x_params_val = x_params_features[idx_val]
x_schedule_val = x_schedule[idx_val]
y_curve_val = y_curve[idx_val]
curve_time_val = curve_time[idx_val]
x_params_test = x_params_features[idx_test]
x_schedule_test = x_schedule[idx_test]
y_curve_test = y_curve[idx_test]
curve_time_test = curve_time[idx_test]
schedule_meta_train = schedule_meta[idx_train] if schedule_meta is not None else None
schedule_meta_val = schedule_meta[idx_val] if schedule_meta is not None else None
@ -222,26 +310,19 @@ def preprocess_dataset(
scaler_params = StandardScaler()
scaler_schedule = StandardScaler()
scaler_curve = StandardScaler()
scaler_time = StandardScaler()
# 只在训练集上拟合 scaler避免验证/测试信息泄漏。
x_params_train_s = scaler_params.fit_transform(x_params_train)
x_schedule_train_s = scaler_schedule.fit_transform(x_schedule_train)
y_curve_train_s = scaler_curve.fit_transform(y_curve_train)
x_time_train = build_time_point_features(curve_time_train)
x_time_train_s = scaler_time.fit_transform(x_time_train.reshape(-1, x_time_train.shape[-1])).reshape(x_time_train.shape)
x_params_val_s = scaler_params.transform(x_params_val)
x_schedule_val_s = scaler_schedule.transform(x_schedule_val)
y_curve_val_s = scaler_curve.transform(y_curve_val)
x_time_val = build_time_point_features(curve_time_val)
x_time_val_s = scaler_time.transform(x_time_val.reshape(-1, x_time_val.shape[-1])).reshape(x_time_val.shape)
x_params_test_s = scaler_params.transform(x_params_test)
x_schedule_test_s = scaler_schedule.transform(x_schedule_test)
y_curve_test_s = scaler_curve.transform(y_curve_test)
x_time_test = build_time_point_features(curve_time_test)
x_time_test_s = scaler_time.transform(x_time_test.reshape(-1, x_time_test.shape[-1])).reshape(x_time_test.shape)
output_path.parent.mkdir(parents=True, exist_ok=True)
@ -256,8 +337,8 @@ def preprocess_dataset(
"raw_param_dim": int(x_params.shape[1]),
"schedule_dim": int(x_schedule.shape[1]),
"curve_dim": curve_dim,
"time_feature_dim": int(x_time_train.shape[-1]),
"curve_time_source": curve_time_source,
"raw_curve_dim": raw_curve_dim,
"dropped_legacy_slope": bool(raw_curve_dim != curve_dim),
"input_h5": str(input_path),
"param_names": param_names,
"param_feature_transform": param_feature_transform,
@ -270,7 +351,6 @@ def preprocess_dataset(
"parts": [
{"name": "log_pressure", "start": 0, "end": int(n_time_points)},
{"name": "log_derivative", "start": int(n_time_points), "end": int(2 * n_time_points)},
{"name": "slope", "start": int(2 * n_time_points), "end": int(3 * n_time_points)},
],
},
}
@ -279,22 +359,15 @@ def preprocess_dataset(
"X_params_train": x_params_train_s.astype(np.float32),
"X_schedule_train": x_schedule_train_s.astype(np.float32),
"Y_curve_train": y_curve_train_s.astype(np.float32),
"T_curve_train": curve_time_train.astype(np.float32),
"X_time_train": x_time_train_s.astype(np.float32),
"X_params_val": x_params_val_s.astype(np.float32),
"X_schedule_val": x_schedule_val_s.astype(np.float32),
"Y_curve_val": y_curve_val_s.astype(np.float32),
"T_curve_val": curve_time_val.astype(np.float32),
"X_time_val": x_time_val_s.astype(np.float32),
"X_params_test": x_params_test_s.astype(np.float32),
"X_schedule_test": x_schedule_test_s.astype(np.float32),
"Y_curve_test": y_curve_test_s.astype(np.float32),
"T_curve_test": curve_time_test.astype(np.float32),
"X_time_test": x_time_test_s.astype(np.float32),
"scaler_params": scaler_params,
"scaler_schedule": scaler_schedule,
"scaler_curve": scaler_curve,
"scaler_time": scaler_time,
"meta": meta,
}
@ -337,8 +410,7 @@ def preprocess_dataset(
print(
f"train={len(idx_train)}, val={len(idx_val)}, test={len(idx_test)}, "
f"param_dim={x_params_features.shape[1]}, schedule_dim={x_schedule.shape[1]}, "
f"curve_dim={y_curve.shape[1]}, time_feature_dim={x_time_train.shape[-1]}, "
f"curve_time_source={curve_time_source}"
f"curve_dim={y_curve.shape[1]}"
)
if schedule_meta is not None:
print(f"schedule_meta_dim={schedule_meta.shape[1]}, family_name_saved={family_name is not None}")

@ -14,10 +14,11 @@
from __future__ import annotations
import os
import queue
import shutil
import struct
import subprocess
import time
import threading
from pathlib import Path
from typing import Any, Dict, Optional
@ -167,6 +168,74 @@ class CppRunner:
assert self.proc is not None and self.proc.stdin is not None and self.proc.stdout is not None
line = f"{params_path} {result_path}\n"
write_timeout = min(max(float(timeout) * 0.10, 1.0), 5.0)
def safe_write_once() -> bool:
results: queue.Queue[bool] = queue.Queue(maxsize=1)
proc_stdin = self.proc.stdin if self.proc is not None else None
def write_line() -> None:
ok = False
try:
assert proc_stdin is not None
proc_stdin.write(line)
proc_stdin.flush()
ok = True
except Exception:
ok = False
try:
results.put(ok)
except Exception:
pass
writer = threading.Thread(target=write_line, daemon=True)
writer.start()
writer.join(timeout=write_timeout)
if writer.is_alive():
self.close()
return False
try:
return bool(results.get_nowait())
except queue.Empty:
return False
if not safe_write_once():
self.close()
self._ensure_server_started()
assert self.proc is not None and self.proc.stdin is not None and self.proc.stdout is not None
if not safe_write_once():
self.close()
return False
responses_safe: queue.Queue[str] = queue.Queue(maxsize=1)
proc_stdout = self.proc.stdout if self.proc is not None else None
def read_response_safe() -> None:
try:
assert proc_stdout is not None
responses_safe.put(proc_stdout.readline())
except Exception:
try:
responses_safe.put("")
except Exception:
pass
reader_safe = threading.Thread(target=read_response_safe, daemon=True)
reader_safe.start()
reader_safe.join(timeout=max(float(timeout), 1.0))
if reader_safe.is_alive():
self.close()
return False
if self.proc is not None and self.proc.poll() is not None:
self.close()
return False
try:
resp_safe = responses_safe.get_nowait()
except queue.Empty:
resp_safe = ""
return bool(resp_safe.strip().startswith("OK"))
try:
self.proc.stdin.write(line)
self.proc.stdin.flush()
@ -178,25 +247,38 @@ class CppRunner:
self.proc.stdin.write(line)
self.proc.stdin.flush()
start = time.time()
while True:
if self.proc.poll() is not None:
self.close()
return False
responses: queue.Queue[str] = queue.Queue(maxsize=1)
def read_response() -> None:
try:
resp = self.proc.stdout.readline()
assert self.proc is not None and self.proc.stdout is not None
responses.put(self.proc.stdout.readline())
except Exception:
resp = ""
try:
responses.put("")
except Exception:
pass
if resp:
resp = resp.strip()
if resp.startswith("OK"):
return True
return False
reader = threading.Thread(target=read_response, daemon=True)
reader.start()
reader.join(timeout=max(float(timeout), 1.0))
if (time.time() - start) > timeout:
return False
if reader.is_alive():
self.close()
return False
if self.proc is not None and self.proc.poll() is not None:
self.close()
return False
try:
resp = responses.get_nowait()
except queue.Empty:
resp = ""
resp = resp.strip()
if resp.startswith("OK"):
return True
return False
def run_simulation(
self,
@ -227,7 +309,14 @@ class CppRunner:
return bool(ok and self.result_bin.exists())
# 非 server 模式保持兼容:每次模拟都启动 runner.exe并等待 result.bin 生成。
cmd = [str(self.runner_exe), str(self.dataset_bin), str(self.params_bin), str(self.result_bin)]
cmd = [
str(self.runner_exe),
str(self.dataset_bin),
str(self.params_bin),
str(self.result_bin),
str(self.runner_dll),
str(self.license_file),
]
result = subprocess.run(
cmd,
cwd=str(self.runner_exe.parent),

@ -1,49 +0,0 @@
# -*- coding: utf-8 -*-
"""时间条件模型的时间点特征。
正演代理模型一次性输出整条固定长度曲线而时间条件模型会把曲线拆成逐时间点
样本本模块为每个曲线时间点构造 log 时间相对时间和采样位置等特征使模型
能够在同一组物理参数和流量制度下预测任意时间点的压力/导数响应
"""
from __future__ import annotations
import numpy as np
def build_default_curve_time(n_samples: int, n_time_points: int) -> np.ndarray:
"""为没有保存真实曲线时间坐标的旧数据集构造默认对数时间网格。"""
# 旧数据只保存曲线值,默认用 1e-6 到 1 的几何网格近似原始双对数时间轴。
base = np.geomspace(1.0e-6, 1.0, int(n_time_points), dtype=np.float32)
return np.tile(base.reshape(1, -1), (int(n_samples), 1)).astype(np.float32)
def build_time_point_features(curve_time: np.ndarray) -> np.ndarray:
"""为每个曲线时间点构造 log10(t)、相对时间和归一化索引等时间条件特征。"""
t = np.asarray(curve_time, dtype=np.float32)
if t.ndim != 2:
raise ValueError(f"curve_time must be 2D [N, T], got shape={t.shape}")
eps = np.float32(1.0e-12)
# 避免 log10(0) 或除以 0时间比值以每条曲线末端时间归一化。
t_safe = np.maximum(t, eps)
t_end = np.maximum(t_safe[:, -1:], eps)
t_ratio = np.clip(t_safe / t_end, eps, None)
n_time = t.shape[1]
if n_time <= 1:
index = np.zeros((n_time,), dtype=np.float32)
else:
index = np.linspace(0.0, 1.0, n_time, dtype=np.float32)
index = np.tile(index.reshape(1, -1), (t.shape[0], 1))
# 四个通道同时保留绝对对数时间、相对对数时间、线性比例和采样位置。
return np.stack(
[
np.log10(t_safe),
np.log10(t_ratio),
t_ratio,
index,
],
axis=-1,
).astype(np.float32)

@ -14,7 +14,7 @@ import numpy as np
def split_curve_by_layout(curve: np.ndarray, layout: dict) -> dict[str, np.ndarray]:
"""按照 curve_layout 将拼接曲线拆成 log_pressure、log_derivative 和 slope 三段"""
"""按照 curve_layout 拆分曲线"""
parts: dict[str, np.ndarray] = {}
for part in layout["parts"]:
start = int(part["start"])

@ -2,7 +2,7 @@
"""正演代理模型网络结构。
ForwardSurrogate 输入标准化后的物理参数特征和可选的流量制度编码输出固定长度
拼接曲线log_pressurelog_derivative slope模型采用参数分支 + 流量制度
拼接曲线log_pressure log_derivative模型采用参数分支 + 流量制度
分支 + 融合主干 + 多输出头的结构便于分别学习静态地层信息动态制度信息以及
二者共同决定的曲线形态
@ -103,8 +103,8 @@ class ForwardSurrogate(nn.Module):
use_schedule=False 时该输入可为空
输出:
curve_pred: 形状 [B, curve_dim] log_pressurelog_derivativeslope
三段顺序拼接curve_dim 必须能被 3 整除以便每段拥有相同时间点数
curve_pred: 形状 [B, curve_dim] log_pressurelog_derivative 顺序拼接
新模型使用双通道布局旧版三通道 checkpoint 仍可加载用于兼容部署
"""
def __init__(
@ -119,7 +119,7 @@ class ForwardSurrogate(nn.Module):
dropout: float = 0.0,
use_schedule: bool = True,
):
"""构建参数分支、可选流量制度分支、融合主干和三组曲线输出头。"""
"""构建参数分支、可选流量制度分支、融合主干和曲线输出头。"""
super().__init__()
config = self._coerce_config(
config=config,
@ -175,8 +175,13 @@ class ForwardSurrogate(nn.Module):
@property
def part_dim(self) -> int:
"""压力、导数和 slope 每一段的时间点数量。"""
return self.config.curve_dim // 3
"""每一段曲线的时间点数量。"""
return self.config.curve_dim // self.n_curve_parts
@property
def n_curve_parts(self) -> int:
"""返回输出通道数480 等旧 checkpoint 保持三通道兼容。"""
return 3 if self.config.curve_dim % 3 == 0 else 2
@property
def use_schedule(self) -> bool:
@ -186,10 +191,10 @@ class ForwardSurrogate(nn.Module):
@staticmethod
def _validate_config(config: ForwardSurrogateConfig) -> None:
"""检查模型配置是否满足网络结构约束。"""
if config.curve_dim % 3 != 0:
if config.curve_dim % 2 != 0 and config.curve_dim % 3 != 0:
msg = (
f"curve_dim={config.curve_dim} 不能被 3 整除"
"期望为 pressure/derivative/slope 三段"
f"curve_dim={config.curve_dim} 无法识别"
"期望为 pressure/derivative 双通道或旧版三通道布局"
)
raise ValueError(msg)
@ -229,17 +234,17 @@ class ForwardSurrogate(nn.Module):
)
def _build_heads(self) -> nn.ModuleDict:
"""构建压力、导数和 slope 输出头。"""
"""构建压力和导数输出头;仅旧版三通道模型增加 slope 头。"""
trunk_out_dim = self.config.fusion_hidden_dims[-1]
return nn.ModuleDict(
{
"pressure_level": self._build_single_head(trunk_out_dim, 1),
"pressure_shape": self._build_single_head(trunk_out_dim, self.part_dim),
"derivative_level": self._build_single_head(trunk_out_dim, 1),
"derivative_shape": self._build_single_head(trunk_out_dim, self.part_dim),
"slope": self._build_single_head(trunk_out_dim, self.part_dim),
}
)
heads = {
"pressure_level": self._build_single_head(trunk_out_dim, 1),
"pressure_shape": self._build_single_head(trunk_out_dim, self.part_dim),
"derivative_level": self._build_single_head(trunk_out_dim, 1),
"derivative_shape": self._build_single_head(trunk_out_dim, self.part_dim),
}
if self.n_curve_parts == 3:
heads["slope"] = self._build_single_head(trunk_out_dim, self.part_dim)
return nn.ModuleDict(heads)
def _build_single_head(self, in_dim: int, out_dim: int) -> nn.Sequential:
"""构建一个曲线输出头。"""
@ -326,8 +331,8 @@ class ForwardSurrogate(nn.Module):
"""执行一次前向预测。
参数分支和流量制度分支先分别编码再在隐空间拼接融合主干提取共同特征后
压力和导数各自通过 level + centered shape 两个输出头生成slope 作为辅助通道
直接由单独输出头预测返回值仍保持预处理阶段约定的曲线拼接布局
压力和导数各自通过 level + centered shape 两个输出头生成旧版三通道模型
会继续生成 slope以便已有 checkpoint 保持可加载
"""
fused_feat = self._encode_features(params_x, schedule_x)
trunk_feat = self.trunk(fused_feat)
@ -342,6 +347,7 @@ class ForwardSurrogate(nn.Module):
"derivative_level",
"derivative_shape",
)
slope_pred = self.heads["slope"](trunk_feat)
return torch.cat([pressure_pred, derivative_pred, slope_pred], dim=1)
outputs = [pressure_pred, derivative_pred]
if "slope" in self.heads:
outputs.append(self.heads["slope"](trunk_feat))
return torch.cat(outputs, dim=1)

@ -1,193 +0,0 @@
# -*- coding: utf-8 -*-
"""时间条件代理模型网络结构。
TimeConditionedSurrogate 不一次性输出完整曲线而是把物理参数 + 流量制度 + 某个
时间点特征作为输入预测该时间点的 log_pressure log_derivative它适合用于
更灵活的时间采样局部曲线重建以及需要按时间点加权的训练策略
模型主体由参数编码器制度编码器时间编码器融合投影和若干残差块组成残差
块让较深的全连接网络更容易优化同时保留原始融合特征
"""
from __future__ import annotations
# pylint: disable=import-error,duplicate-code,too-many-arguments,too-many-positional-arguments
from dataclasses import dataclass
import torch
from torch import nn
@dataclass(frozen=True)
class TimeConditionedSurrogateConfig:
"""时间条件代理模型配置。"""
param_dim: int
schedule_dim: int
time_dim: int
hidden_dim: int = 256
n_blocks: int = 4
dropout: float = 0.05
use_schedule: bool = True
class ResidualBlock(nn.Module):
"""时间条件模型使用的全连接残差块,用于在较深网络中稳定传播特征。"""
def __init__(self, dim: int, dropout: float = 0.0):
"""构造两层全连接残差块,并根据 dropout 参数决定是否启用随机失活。"""
super().__init__()
self.net = nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, dim),
nn.GELU(),
nn.Dropout(dropout) if dropout > 0 else nn.Identity(),
nn.Linear(dim, dim),
)
self.act = nn.GELU()
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""在保留原始隐藏表示的基础上叠加两层非线性修正。"""
# 残差连接让块学习修正量,而不是每层都重新表示完整特征。
return self.act(x + self.net(x))
class TimeConditionedSurrogate(nn.Module):
"""逐时间点预测的时间条件代理模型。
ForwardSurrogate 一次输出整条曲线不同该模型每次只预测一个时间点的
log_pressure log_derivative训练数据通常由 PointCurveDataset [N, T] 曲线
展开为 N*T 个样本因此 batch 中每一行都带有自己的 time_x
"""
def __init__(
self,
config: TimeConditionedSurrogateConfig | None = None,
*,
param_dim: int | None = None,
schedule_dim: int | None = None,
time_dim: int | None = None,
hidden_dim: int = 256,
n_blocks: int = 4,
dropout: float = 0.05,
use_schedule: bool = True,
):
"""按配置组装时间条件代理模型的编码、融合和输出层。"""
super().__init__()
config = self._coerce_config(
config=config,
param_dim=param_dim,
schedule_dim=schedule_dim,
time_dim=time_dim,
hidden_dim=hidden_dim,
n_blocks=n_blocks,
dropout=dropout,
use_schedule=use_schedule,
)
hidden_dim = int(config.hidden_dim)
time_hidden_dim = hidden_dim // 2
self.use_schedule = bool(config.use_schedule)
self.param_encoder = self._build_encoder(config.param_dim, hidden_dim)
self.time_encoder = self._build_encoder(config.time_dim, time_hidden_dim)
if self.use_schedule:
self.schedule_encoder = self._build_encoder(config.schedule_dim, hidden_dim)
fusion_dim = hidden_dim * 2 + time_hidden_dim
else:
self.schedule_encoder = None
fusion_dim = hidden_dim + time_hidden_dim
self.input_proj = nn.Sequential(
nn.Linear(fusion_dim, hidden_dim),
nn.GELU(),
)
self.blocks = nn.Sequential(
*[
ResidualBlock(hidden_dim, dropout=config.dropout)
for _ in range(int(config.n_blocks))
]
)
self.head = nn.Sequential(
nn.LayerNorm(hidden_dim),
nn.Linear(hidden_dim, time_hidden_dim),
nn.GELU(),
nn.Linear(time_hidden_dim, 2),
)
@staticmethod
def _coerce_config(
config: TimeConditionedSurrogateConfig | None,
*,
param_dim: int | None,
schedule_dim: int | None,
time_dim: int | None,
hidden_dim: int,
n_blocks: int,
dropout: float,
use_schedule: bool,
) -> TimeConditionedSurrogateConfig:
"""兼容配置对象式构造和旧版关键字参数式构造。"""
if config is not None:
return config
if param_dim is None or schedule_dim is None or time_dim is None:
raise TypeError(
"TimeConditionedSurrogate requires either a TimeConditionedSurrogateConfig "
"or param_dim, schedule_dim and time_dim keyword arguments"
)
return TimeConditionedSurrogateConfig(
param_dim=int(param_dim),
schedule_dim=int(schedule_dim),
time_dim=int(time_dim),
hidden_dim=int(hidden_dim),
n_blocks=int(n_blocks),
dropout=float(dropout),
use_schedule=bool(use_schedule),
)
@staticmethod
def _build_encoder(in_dim: int, out_dim: int) -> nn.Sequential:
"""构建 Linear-LayerNorm-GELU 编码器。"""
return nn.Sequential(
nn.Linear(in_dim, out_dim),
nn.LayerNorm(out_dim),
nn.GELU(),
)
def forward(
self,
params_x: torch.Tensor,
time_x: torch.Tensor,
schedule_x: torch.Tensor | None = None,
) -> torch.Tensor:
"""融合样本级特征和点级时间特征,输出双通道响应。
params_x schedule_x 在同一条曲线的所有时间点上相同time_x 则随时间点变化
三类特征编码后拼接进入残差主干最后输出 [B, 2]分别对应标准化空间中的
log_pressure log_derivative
"""
# params_x 和 schedule_x 是样本级特征time_x 是展开后的点级特征。
param_feat = self.param_encoder(params_x)
time_feat = self.time_encoder(time_x)
if self.use_schedule:
if schedule_x is None:
raise ValueError("use_schedule=True但 forward 没有传入 schedule_x")
schedule_feat = self.schedule_encoder(schedule_x)
fused = torch.cat([param_feat, schedule_feat, time_feat], dim=-1)
else:
fused = torch.cat([param_feat, time_feat], dim=-1)
# 融合后的特征经过残差主干,输出 log_pressure 和 log_derivative 两个通道。
hidden = self.input_proj(fused)
hidden = self.blocks(hidden)
return self.head(hidden)
def build_time_conditioned_surrogate(
config: TimeConditionedSurrogateConfig,
) -> TimeConditionedSurrogate:
"""根据配置创建时间条件代理模型。"""
return TimeConditionedSurrogate(config)

@ -3,7 +3,7 @@
本模块读取 preprocess.py 生成的 joblib 数据构造 PyTorch Dataset/DataLoader训练
ForwardSurrogate并按验证集损失保存最佳 checkpoint损失函数不是单一 MSE而是
由压力导数slope均值偏置导数形状约束和可选自动拟合目标组成目的是让
由压力导数均值偏置导数形状约束和可选自动拟合目标组成目的是让
模型既能拟合点值也能保持对自动试井拟合有意义的曲线形态
训练过程会保存 history.jsonmetrics.json forward_surrogate_best.pt后续评估
@ -34,7 +34,6 @@ METRIC_KEYS = (
"loss",
"loss_pressure",
"loss_derivative",
"loss_slope",
"loss_bias_pressure",
"loss_bias_derivative",
"loss_derivative_shape",
@ -88,7 +87,6 @@ class LossWeights:
pressure: float = 1.0
derivative: float = 2.0
slope: float = 0.0
bias_pressure: float = 0.15
bias_derivative: float = 0.05
derivative_shape: float = 0.10
@ -146,14 +144,12 @@ class CurveStats:
@dataclass
class LossBatchParts:
"""曲线三段的预测值和真实值。"""
"""压力和导数的预测值与真实值。"""
pred_p: torch.Tensor
pred_d: torch.Tensor
pred_s: torch.Tensor
true_p: torch.Tensor
true_d: torch.Tensor
true_s: torch.Tensor
@dataclass
@ -202,24 +198,29 @@ def load_processed_dataset(path: Path) -> dict:
def infer_curve_layout(data: dict) -> dict:
"""从元数据读取曲线分段布局;旧数据没有布局时按压力/导数/斜率三等分回退"""
"""读取双通道布局,并拒绝未经重新预处理的旧三通道数据"""
meta = data.get("meta", {})
curve_dim = int(meta.get("curve_dim", data["Y_curve_train"].shape[1]))
curve_layout = meta.get("curve_layout")
if curve_layout is not None:
names = {str(part["name"]) for part in curve_layout.get("parts", [])}
if "slope" in names:
raise ValueError(
"processed 数据仍包含旧版 slope 通道;请先重新运行 "
"scripts/preprocess_dataset.py预处理会自动裁掉 slope"
)
return curve_layout
if curve_dim % 3 != 0:
raise ValueError(f"curve_dim={curve_dim} 不能按 3 段均分,且 meta 中没有 curve_layout")
if curve_dim % 2 != 0:
raise ValueError(f"curve_dim={curve_dim} 不能按压力/导数两段均分")
n_time_points = curve_dim // 3
n_time_points = curve_dim // 2
return {
"n_time_points": int(n_time_points),
"parts": [
{"name": "log_pressure", "start": 0, "end": n_time_points},
{"name": "log_derivative", "start": n_time_points, "end": 2 * n_time_points},
{"name": "slope", "start": 2 * n_time_points, "end": 3 * n_time_points},
],
}
@ -309,14 +310,12 @@ def split_curve_parts(
target: torch.Tensor,
slices: dict[str, slice],
) -> LossBatchParts:
"""把拼接曲线拆成压力、导数和 slope 三段。"""
"""把拼接曲线拆成压力和导数两段。"""
return LossBatchParts(
pred_p=pred[:, slices["log_pressure"]],
pred_d=pred[:, slices["log_derivative"]],
pred_s=pred[:, slices["slope"]],
true_p=target[:, slices["log_pressure"]],
true_d=target[:, slices["log_derivative"]],
true_s=target[:, slices["slope"]],
)
@ -328,7 +327,6 @@ def compute_basic_loss_vectors(
return {
"loss_pressure": regression_per_sample(parts.pred_p, parts.true_p, loss_cfg),
"loss_derivative": regression_per_sample(parts.pred_d, parts.true_d, loss_cfg),
"loss_slope": mse_per_sample(parts.pred_s, parts.true_s),
"loss_bias_pressure": l1_per_sample(
parts.pred_p.mean(dim=1, keepdim=True),
parts.true_p.mean(dim=1, keepdim=True),
@ -378,7 +376,6 @@ def weighted_total_vector(
return (
weights.pressure * loss_vectors["loss_pressure"]
+ weights.derivative * loss_vectors["loss_derivative"]
+ weights.slope * loss_vectors["loss_slope"]
+ weights.bias_pressure * loss_vectors["loss_bias_pressure"]
+ weights.bias_derivative * loss_vectors["loss_bias_derivative"]
+ weights.derivative_shape * loss_vectors["loss_derivative_shape"]
@ -582,7 +579,7 @@ def print_training_config(cfg: TrainConfig, curve_layout: dict) -> None:
print(f" use_schedule={cfg.model.use_schedule}")
print(
f" weights: pressure={weights.pressure}, derivative={weights.derivative}, "
f"slope={weights.slope}, bias_p={weights.bias_pressure}, "
f"bias_p={weights.bias_pressure}, "
f"bias_d={weights.bias_derivative}, d_shape={weights.derivative_shape}, "
f"autofit_p={weights.autofit_pressure}, autofit_d={weights.autofit_derivative}"
)
@ -601,7 +598,6 @@ def format_metric_line(epoch: int, train_metrics: dict[str, float], val_metrics:
f"train={train_metrics['loss']:.6f} "
f"(p={train_metrics['loss_pressure']:.6f}, "
f"d={train_metrics['loss_derivative']:.6f}, "
f"s={train_metrics['loss_slope']:.6f}, "
f"bp={train_metrics['loss_bias_pressure']:.6f}, "
f"bd={train_metrics['loss_bias_derivative']:.6f}, "
f"ds={train_metrics['loss_derivative_shape']:.6f}, "
@ -612,7 +608,6 @@ def format_metric_line(epoch: int, train_metrics: dict[str, float], val_metrics:
f"val={val_metrics['loss']:.6f} "
f"(p={val_metrics['loss_pressure']:.6f}, "
f"d={val_metrics['loss_derivative']:.6f}, "
f"s={val_metrics['loss_slope']:.6f}, "
f"bp={val_metrics['loss_bias_pressure']:.6f}, "
f"bd={val_metrics['loss_bias_derivative']:.6f}, "
f"ds={val_metrics['loss_derivative_shape']:.6f}, "
@ -629,7 +624,6 @@ def format_final_line(test_metrics: dict[str, float]) -> str:
f"[Final] test={test_metrics['loss']:.6f} "
f"(p={test_metrics['loss_pressure']:.6f}, "
f"d={test_metrics['loss_derivative']:.6f}, "
f"s={test_metrics['loss_slope']:.6f}, "
f"bp={test_metrics['loss_bias_pressure']:.6f}, "
f"bd={test_metrics['loss_bias_derivative']:.6f}, "
f"ds={test_metrics['loss_derivative_shape']:.6f}, "

@ -1,552 +0,0 @@
# -*- coding: utf-8 -*-
"""时间条件代理模型训练流程。
本模块训练逐时间点预测模型每条曲线会被展开为 N*T 个点级样本每个样本包含
同一条曲线共享的物理参数/流量制度特征以及当前时间点的时间特征目标是该点的
log_pressure log_derivative
该训练方式可以对风险参数区域或特殊时间点引入更细粒度的加权策略也便于后续
在任意时间网格上重建曲线响应
"""
from __future__ import annotations
# pylint: disable=import-error,duplicate-code
import json
import random
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Any
import joblib
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from src.data.param_features import inverse_transform_param_features
from src.models.time_conditioned_surrogate import (
TimeConditionedSurrogate,
TimeConditionedSurrogateConfig,
)
from src.training.train_forward import get_part_slices, infer_curve_layout
@dataclass
class PointCurveArrays:
"""逐时间点数据集所需数组。"""
params_x: np.ndarray
schedule_x: np.ndarray
time_x: np.ndarray
curve_y: np.ndarray
layout: dict
sample_weight: np.ndarray | None = None
class PointCurveDataset(Dataset):
"""把完整曲线展开为逐时间点训练样本。"""
def __init__(self, arrays: PointCurveArrays):
"""保存逐点训练所需的参数、流量制度、时间特征、目标曲线和样本权重。"""
self.params_x = torch.tensor(arrays.params_x, dtype=torch.float32)
self.schedule_x = torch.tensor(arrays.schedule_x, dtype=torch.float32)
self.time_x = torch.tensor(arrays.time_x, dtype=torch.float32)
slices = get_part_slices(arrays.layout)
pressure_y = arrays.curve_y[:, slices["log_pressure"]]
derivative_y = arrays.curve_y[:, slices["log_derivative"]]
self.y = torch.tensor(np.stack([pressure_y, derivative_y], axis=-1), dtype=torch.float32)
self.n_samples = int(self.params_x.shape[0])
self.n_time = int(self.time_x.shape[1])
sample_weight = arrays.sample_weight
if sample_weight is None:
sample_weight = np.ones((self.n_samples,), dtype=np.float32)
sample_weight = np.asarray(sample_weight, dtype=np.float32).reshape(-1)
if sample_weight.shape[0] != self.n_samples:
raise ValueError(f"sample_weight length mismatch: {sample_weight.shape[0]} != {self.n_samples}")
self.sample_weight = torch.tensor(sample_weight, dtype=torch.float32)
def __len__(self) -> int:
"""返回数据集或容器中可迭代样本的数量。"""
return self.n_samples * self.n_time
def __getitem__(self, idx: int):
"""按索引取出一个训练样本或数据项。"""
sample_idx = idx // self.n_time
time_idx = idx % self.n_time
return (
self.params_x[sample_idx],
self.schedule_x[sample_idx],
self.time_x[sample_idx, time_idx],
self.y[sample_idx, time_idx],
self.sample_weight[sample_idx],
)
@dataclass
class TimeModelConfig:
"""时间条件代理模型结构配置。"""
hidden_dim: int = 256
n_blocks: int = 4
dropout: float = 0.05
use_schedule: bool = True
@dataclass
class TimeOptimConfig:
"""训练轮次和优化器配置。"""
batch_size: int = 4096
epochs: int = 120
lr: float = 1.0e-3
weight_decay: float = 1.0e-4
@dataclass
class TimeLossConfig:
"""时间条件模型点级损失配置。"""
w_pressure: float = 1.0
w_derivative: float = 2.0
huber_beta: float = 0.05
@dataclass
class RiskWeightConfig:
"""风险区域样本加权配置。"""
mode: str = "none"
risk_weight: float = 2.5
skin_lt_minus8_weight: float = 3.5
weight_min: float = 1.0
weight_max: float = 4.0
@dataclass
class TimeRuntimeConfig:
"""训练运行时配置。"""
seed: int = 42
device: str = "cuda" if torch.cuda.is_available() else "cpu"
@dataclass
class TimeConditionedTrainConfig:
"""时间条件代理模型训练配置。"""
processed_path: Path
output_dir: Path
runtime: TimeRuntimeConfig = field(default_factory=TimeRuntimeConfig)
optim: TimeOptimConfig = field(default_factory=TimeOptimConfig)
model: TimeModelConfig = field(default_factory=TimeModelConfig)
loss: TimeLossConfig = field(default_factory=TimeLossConfig)
risk_weight: RiskWeightConfig = field(default_factory=RiskWeightConfig)
@dataclass
class DataBundle:
"""训练、验证、测试数据加载器与输入维度。"""
train_loader: DataLoader
val_loader: DataLoader
test_loader: DataLoader
param_dim: int
schedule_dim: int
time_dim: int
@dataclass
class TrainArtifacts:
"""训练过程中需要跨函数传递的数据。"""
data: dict
curve_layout: dict
train_weight_summary: dict[str, Any]
bundle: DataBundle
def set_global_seed(seed: int) -> None:
"""设置 Python、NumPy 和 PyTorch 随机种子,并在 CUDA 可用时同步设置 GPU 随机种子。"""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
def _smooth_l1_vector(pred: torch.Tensor, target: torch.Tensor, beta: float) -> torch.Tensor:
"""计算逐向量 Smooth L1 损失,保留样本维度用于加权。"""
return F.smooth_l1_loss(pred, target, beta=float(beta), reduction="none")
def _loss(
pred: torch.Tensor,
target: torch.Tensor,
loss_cfg: TimeLossConfig,
sample_weight: torch.Tensor | None = None,
) -> torch.Tensor:
"""计算时间条件模型的点级损失,并按样本权重求平均。"""
loss_p = _smooth_l1_vector(pred[:, 0], target[:, 0], beta=float(loss_cfg.huber_beta))
loss_d = _smooth_l1_vector(pred[:, 1], target[:, 1], beta=float(loss_cfg.huber_beta))
loss_vec = float(loss_cfg.w_pressure) * loss_p + float(loss_cfg.w_derivative) * loss_d
if sample_weight is None:
return loss_vec.mean()
weight = sample_weight.to(loss_vec.device).reshape(-1).clamp_min(0.0)
return (loss_vec * weight).sum() / torch.clamp(weight.sum(), min=1.0e-12)
def _move_batch_to_device(
batch: tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
device: str,
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
"""把一个 batch 的所有张量移动到目标设备。"""
params_x, schedule_x, time_x, target_y, sample_weight = batch
return (
params_x.to(device),
schedule_x.to(device),
time_x.to(device),
target_y.to(device),
sample_weight.to(device),
)
def _forward_batch(
model: TimeConditionedSurrogate,
batch: tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
cfg: TimeConditionedTrainConfig,
use_weight: bool,
) -> tuple[torch.Tensor, int]:
"""完成一个 batch 的前向计算并返回损失和样本数。"""
params_x, schedule_x, time_x, target_y, sample_weight = _move_batch_to_device(
batch,
cfg.runtime.device,
)
schedule_input = schedule_x if cfg.model.use_schedule else None
pred = model(params_x, time_x, schedule_input)
weight = sample_weight if use_weight else None
loss = _loss(pred, target_y, cfg.loss, sample_weight=weight)
return loss, int(target_y.shape[0])
def _run_loader(
model: TimeConditionedSurrogate,
loader: DataLoader,
cfg: TimeConditionedTrainConfig,
optimizer: torch.optim.Optimizer | None = None,
) -> float:
"""执行一个训练或评估 epoch。"""
is_train = optimizer is not None
model.train(mode=is_train)
total = 0.0
total_n = 0
grad_context = torch.enable_grad() if is_train else torch.no_grad()
with grad_context:
for batch in loader:
if is_train:
optimizer.zero_grad()
loss, batch_size = _forward_batch(model, batch, cfg, use_weight=is_train)
if is_train:
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
total += float(loss.detach().cpu()) * batch_size
total_n += batch_size
return total / max(total_n, 1)
def _evaluate(
model: TimeConditionedSurrogate,
loader: DataLoader,
cfg: TimeConditionedTrainConfig,
) -> float:
"""在验证集或测试集上评估时间条件模型的平均损失。"""
return _run_loader(model, loader, cfg, optimizer=None)
def _raw_params_from_processed_split(data: dict, split: str) -> dict[str, np.ndarray]:
"""从预处理数据中读取某个划分的原始参数,用于构造样本权重。"""
key = f"X_params_{split}"
features = data["scaler_params"].inverse_transform(data[key])
transform = data.get("meta", {}).get("param_feature_transform")
raw = inverse_transform_param_features(features, transform)
default_names = ["k", "skin", "wellboreC", "phi", "h", "Cf"]
names = list(data.get("meta", {}).get("param_names") or default_names)
return {
name: raw[:, idx].astype(np.float64)
for idx, name in enumerate(names[: raw.shape[1]])
}
def _build_sample_weight(
data: dict,
cfg: TimeConditionedTrainConfig,
split: str = "train",
) -> np.ndarray:
"""根据原始物理参数生成样本权重,使关键参数区域得到更多关注。"""
mode = str(cfg.risk_weight.mode or "none").lower()
n_samples = int(data[f"X_params_{split}"].shape[0])
if mode in {"none", "off", "false"}:
return np.ones((n_samples,), dtype=np.float32)
if mode != "risk_region":
raise ValueError(f"Unknown sample_weight_mode={cfg.risk_weight.mode!r}")
params = _raw_params_from_processed_split(data, split)
weight = np.ones((n_samples,), dtype=np.float32)
risk = (params["skin"] < -5.0) & (params["wellboreC"] > 0.1)
skin_extreme = params["skin"] < -8.0
weight[risk] = np.maximum(weight[risk], float(cfg.risk_weight.risk_weight))
weight[skin_extreme] = np.maximum(weight[skin_extreme], float(cfg.risk_weight.skin_lt_minus8_weight))
return np.clip(
weight,
float(cfg.risk_weight.weight_min),
float(cfg.risk_weight.weight_max),
).astype(np.float32)
def _summarize_sample_weight(sample_weight: np.ndarray) -> dict[str, Any]:
"""统计样本权重的最小值、最大值和分位数,便于检查加权强度。"""
weight = np.asarray(sample_weight, dtype=np.float32).reshape(-1)
return {
"min": float(np.min(weight)),
"mean": float(np.mean(weight)),
"median": float(np.median(weight)),
"max": float(np.max(weight)),
"n_weight_gt_1": int(np.sum(weight > 1.0)),
"n_weight_lt_1": int(np.sum(weight < 1.0)),
}
def _load_processed_data(path: Path) -> dict:
"""读取预处理数据并检查时间条件训练所需字段。"""
data = joblib.load(path)
required = ["X_time_train", "X_time_val", "X_time_test"]
missing = [key for key in required if key not in data]
if missing:
raise KeyError(f"processed dataset is missing time-conditioned fields: {missing}")
return data
def _make_point_dataset(
data: dict,
split: str,
curve_layout: dict,
sample_weight: np.ndarray | None = None,
) -> PointCurveDataset:
"""构造某个数据划分对应的逐点数据集。"""
return PointCurveDataset(
PointCurveArrays(
params_x=data[f"X_params_{split}"],
schedule_x=data[f"X_schedule_{split}"],
time_x=data[f"X_time_{split}"],
curve_y=data[f"Y_curve_{split}"],
layout=curve_layout,
sample_weight=sample_weight,
)
)
def _build_dataloaders(
data: dict,
curve_layout: dict,
train_weight: np.ndarray,
cfg: TimeConditionedTrainConfig,
) -> DataBundle:
"""构建训练、验证和测试 DataLoader。"""
train_ds = _make_point_dataset(data, "train", curve_layout, train_weight)
val_ds = _make_point_dataset(data, "val", curve_layout)
test_ds = _make_point_dataset(data, "test", curve_layout)
generator = torch.Generator()
generator.manual_seed(int(cfg.runtime.seed))
train_loader = DataLoader(
train_ds,
batch_size=cfg.optim.batch_size,
shuffle=True,
generator=generator,
)
val_loader = DataLoader(val_ds, batch_size=cfg.optim.batch_size, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=cfg.optim.batch_size, shuffle=False)
return DataBundle(
train_loader=train_loader,
val_loader=val_loader,
test_loader=test_loader,
param_dim=int(data["X_params_train"].shape[1]),
schedule_dim=int(data["X_schedule_train"].shape[1]),
time_dim=int(data["X_time_train"].shape[-1]),
)
def _prepare_training_artifacts(cfg: TimeConditionedTrainConfig) -> TrainArtifacts:
"""加载数据、推断布局、构造样本权重和 DataLoader。"""
data = _load_processed_data(cfg.processed_path)
curve_layout = infer_curve_layout(data)
train_weight = _build_sample_weight(data, cfg, split="train")
train_weight_summary = _summarize_sample_weight(train_weight)
bundle = _build_dataloaders(data, curve_layout, train_weight, cfg)
return TrainArtifacts(
data=data,
curve_layout=curve_layout,
train_weight_summary=train_weight_summary,
bundle=bundle,
)
def _build_model(bundle: DataBundle, cfg: TimeConditionedTrainConfig) -> TimeConditionedSurrogate:
"""兼容新版配置式模型和旧版关键字参数式模型。"""
model_cfg = TimeConditionedSurrogateConfig(
param_dim=bundle.param_dim,
schedule_dim=bundle.schedule_dim,
time_dim=bundle.time_dim,
hidden_dim=int(cfg.model.hidden_dim),
n_blocks=int(cfg.model.n_blocks),
dropout=float(cfg.model.dropout),
use_schedule=bool(cfg.model.use_schedule),
)
return TimeConditionedSurrogate(model_cfg).to(cfg.runtime.device)
def _build_optimizer(
model: TimeConditionedSurrogate,
cfg: TimeConditionedTrainConfig,
) -> torch.optim.Optimizer:
"""构建 AdamW 优化器。"""
return torch.optim.AdamW(
model.parameters(),
lr=float(cfg.optim.lr),
weight_decay=float(cfg.optim.weight_decay),
)
def _print_training_config(
cfg: TimeConditionedTrainConfig,
artifacts: TrainArtifacts,
) -> None:
"""打印时间条件训练配置摘要。"""
meta = artifacts.data.get("meta", {})
print("Time-conditioned training config:")
print(f" processed={cfg.processed_path}")
print(f" output_dir={cfg.output_dir}")
print(
f" device={cfg.runtime.device}, batch_size={cfg.optim.batch_size}, "
f"epochs={cfg.optim.epochs}"
)
print(
f" dims: param={artifacts.bundle.param_dim}, "
f"schedule={artifacts.bundle.schedule_dim}, time={artifacts.bundle.time_dim}"
)
print(f" curve_time_source={meta.get('curve_time_source', 'unknown')}")
print(
f" sample_weight_mode={cfg.risk_weight.mode}, "
f"sample_weight={artifacts.train_weight_summary}"
)
def _checkpoint_payload(
model: TimeConditionedSurrogate,
cfg: TimeConditionedTrainConfig,
artifacts: TrainArtifacts,
) -> dict[str, Any]:
"""构造 checkpoint 保存内容。"""
return {
"model_state_dict": model.state_dict(),
"param_dim": artifacts.bundle.param_dim,
"schedule_dim": artifacts.bundle.schedule_dim,
"time_dim": artifacts.bundle.time_dim,
"hidden_dim": int(cfg.model.hidden_dim),
"n_blocks": int(cfg.model.n_blocks),
"dropout": float(cfg.model.dropout),
"use_schedule": bool(cfg.model.use_schedule),
"curve_layout": artifacts.curve_layout,
"processed_path": str(cfg.processed_path),
"seed": int(cfg.runtime.seed),
"sample_weight_mode": str(cfg.risk_weight.mode),
"sample_weight_summary": artifacts.train_weight_summary,
"model_config": asdict(cfg.model),
"loss_config": asdict(cfg.loss),
"risk_weight_config": asdict(cfg.risk_weight),
}
def _save_json(path: Path, payload: dict | list) -> None:
"""写出 JSON 文件。"""
path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
def _train_epochs(
model: TimeConditionedSurrogate,
cfg: TimeConditionedTrainConfig,
artifacts: TrainArtifacts,
) -> tuple[float, Path, list[dict]]:
"""执行训练循环并保存最佳模型。"""
optimizer = _build_optimizer(model, cfg)
best_val = float("inf")
best_path = cfg.output_dir / "time_conditioned_surrogate_best.pt"
history: list[dict] = []
for epoch in range(1, int(cfg.optim.epochs) + 1):
train_loss = _run_loader(model, artifacts.bundle.train_loader, cfg, optimizer=optimizer)
val_loss = _evaluate(model, artifacts.bundle.val_loader, cfg)
history.append({"epoch": epoch, "train_loss": train_loss, "val_loss": val_loss})
print(f"[Epoch {epoch:03d}] train={train_loss:.6f} val={val_loss:.6f}")
if val_loss < best_val:
best_val = val_loss
torch.save(_checkpoint_payload(model, cfg, artifacts), best_path)
print(f" -> best model saved to: {best_path}")
return best_val, best_path, history
def _write_final_outputs(
cfg: TimeConditionedTrainConfig,
best_val: float,
test_loss: float,
history: list[dict],
artifacts: TrainArtifacts,
) -> None:
"""保存 history.json 和 metrics.json。"""
_save_json(cfg.output_dir / "history.json", history)
_save_json(
cfg.output_dir / "metrics.json",
{
"best_val_loss": best_val,
"test_loss": test_loss,
"sample_weight_mode": str(cfg.risk_weight.mode),
"sample_weight_summary": artifacts.train_weight_summary,
"model_config": asdict(cfg.model),
"loss_config": asdict(cfg.loss),
"risk_weight_config": asdict(cfg.risk_weight),
},
)
def train_time_conditioned(cfg: TimeConditionedTrainConfig) -> None:
"""训练时间条件代理模型并保存训练产物。"""
cfg.output_dir.mkdir(parents=True, exist_ok=True)
set_global_seed(int(cfg.runtime.seed))
artifacts = _prepare_training_artifacts(cfg)
model = _build_model(artifacts.bundle, cfg)
_print_training_config(cfg, artifacts)
best_val, best_path, history = _train_epochs(model, cfg, artifacts)
checkpoint = torch.load(best_path, map_location=cfg.runtime.device)
model.load_state_dict(checkpoint["model_state_dict"])
test_loss = _evaluate(model, artifacts.bundle.test_loader, cfg)
_write_final_outputs(cfg, best_val, test_loss, history, artifacts)
print(f"[Final] test={test_loss:.6f}")

@ -1223,7 +1223,7 @@ QString nmCalculationAutoFitPSO::getSurrogateTag() const
{
// 当前 C++ 绑定的代理模型实验标识,对应 ML 侧训练输出目录/checkpoint 命名。
// 如果以后换模型,需要同步更新这里和 ML 脚本可识别的 tag。
return "family_random_v2_q_50k_logparam";
return "family_random_v2_q_50k_noslope";
}
QString nmCalculationAutoFitPSO::getSurrogateStage() const
@ -6001,4 +6001,4 @@ void nmCalculationAutoFitPSO::finishSimulation()
QString message = QString(tr("PSO optimization converged to stable solution. Best error: %1, Iterations: %2"))
.arg(m_globalBestFitness, 0, 'e', 4).arg(m_simulationMaxIterations);
emit fittingFinished(true, message);
}
}

Loading…
Cancel
Save