from __future__ import annotations import argparse import sys from pathlib import Path ROOT = Path(__file__).resolve().parents[1] sys.path.append(str(ROOT)) from src.common.experiment_paths import normalize_tag, processed_path_for_tag from src.data.preprocess import preprocess_dataset def main() -> None: parser = argparse.ArgumentParser(description="Preprocess HDF5 dataset for forward surrogate") parser.add_argument( "--input", type=str, required=True, help="Path to the generated .h5 dataset", ) parser.add_argument( "--output", type=str, default=None, help="Optional output .pkl path", ) parser.add_argument("--tag", type=str, default=None, help="Experiment tag for auto naming") parser.add_argument("--test-size", type=float, default=0.15) parser.add_argument("--val-size", type=float, default=0.15) parser.add_argument("--seed", type=int, default=42) parser.add_argument( "--no-param-feature-transform", action="store_true", help="Keep raw physical parameters before StandardScaler; default uses log/asinh features", ) args = parser.parse_args() tag = normalize_tag(args.tag) output_path = Path(args.output) if args.output is not None else processed_path_for_tag(tag) preprocess_dataset( input_path=Path(args.input), output_path=output_path, test_size=args.test_size, val_size=args.val_size, random_seed=args.seed, use_param_feature_transform=not args.no_param_feature_transform, ) if __name__ == "__main__": main()