Source code for ddmtolab.Problems.RWO.nn_training

"""Neural Network Weight Training (NN-Training) benchmark problems.

This module provides real-world single-task optimization problems where the
decision variables are the **neural network weights** themselves. The optimization
algorithm (e.g., DE, PSO, GA) directly searches for optimal weight configurations
to minimize classification error or regression loss -- no gradient-based training.

Data is split into train/test sets (70/30 by default). The optimization objective
is the **test set error**: classification error rate or regression MSE.

Datasets used (from scikit-learn):
- Classification: Digits (10 classes), Covertype (7 classes)
- Regression: Diabetes, California Housing

Network architecture: fixed single-hidden-layer MLP with ReLU activation.
Decision variables: flattened weight vector [W1, b1, W2, b2].
"""

import numpy as np
from sklearn.datasets import (
    load_digits, load_diabetes,
    fetch_california_housing, fetch_covtype
)
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from ddmtolab.Methods.mtop import MTOP


# ---------------------------------------------------------------------------
# Fixed-architecture neural network (numpy-only, no PyTorch needed)
# ---------------------------------------------------------------------------

class _FixedNN:
    """
    Fixed-architecture feedforward neural network parameterized by a flat
    weight vector. Uses ReLU activation on hidden layers and linear output.

    Parameters
    ----------
    layer_sizes : list of int
        Sizes of each layer, e.g. [4, 10, 3] means input=4, hidden=10, output=3.

    Attributes
    ----------
    n_params : int
        Total number of trainable parameters (weights + biases).
    """

    def __init__(self, layer_sizes):
        self.layer_sizes = layer_sizes
        self.n_params = sum(
            layer_sizes[i] * layer_sizes[i + 1] + layer_sizes[i + 1]
            for i in range(len(layer_sizes) - 1)
        )

    def forward(self, X, params):
        """
        Forward pass with given flattened parameters.

        Parameters
        ----------
        X : np.ndarray, shape (n_samples, input_dim)
            Input features.
        params : np.ndarray, shape (n_params,)
            Flattened weight vector.

        Returns
        -------
        np.ndarray, shape (n_samples, output_dim)
            Network output (logits for classification, raw values for regression).
        """
        offset = 0
        h = X
        for i in range(len(self.layer_sizes) - 1):
            in_d = self.layer_sizes[i]
            out_d = self.layer_sizes[i + 1]

            W = params[offset:offset + in_d * out_d].reshape(in_d, out_d)
            offset += in_d * out_d
            b = params[offset:offset + out_d]
            offset += out_d

            h = h @ W + b

            # ReLU on all hidden layers (not output)
            if i < len(self.layer_sizes) - 2:
                h = np.maximum(h, 0)

        return h


# ---------------------------------------------------------------------------
# Data loading utilities
# ---------------------------------------------------------------------------

def _load_cls_data(loader_func, max_samples=None, test_ratio=0.3, seed=42):
    """
    Load, standardize, and split a classification dataset.

    Returns
    -------
    tuple
        (X_train, y_train, X_test, y_test, n_classes).
    """
    data = loader_func()
    X = data.data.astype(np.float64)
    y = data.target.astype(np.int64)

    if max_samples and X.shape[0] > max_samples:
        rng = np.random.RandomState(seed)
        idx = rng.choice(X.shape[0], max_samples, replace=False)
        X, y = X[idx], y[idx]

    # Remap labels to 0..n_classes-1 (covtype labels start from 1)
    unique_labels = np.sort(np.unique(y))
    if unique_labels[0] != 0 or not np.array_equal(unique_labels, np.arange(len(unique_labels))):
        label_map = {old: new for new, old in enumerate(unique_labels)}
        y = np.array([label_map[yi] for yi in y])

    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    n_classes = len(np.unique(y))

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, random_state=seed, stratify=y
    )
    return X_train, y_train, X_test, y_test, n_classes


def _load_reg_data(loader_func, max_samples=None, test_ratio=0.3, seed=42):
    """
    Load, standardize, and split a regression dataset.

    Returns
    -------
    tuple
        (X_train, y_train, X_test, y_test).
    """
    data = loader_func()
    X = data.data.astype(np.float64)
    y = data.target.astype(np.float64)

    if max_samples and X.shape[0] > max_samples:
        rng = np.random.RandomState(seed)
        idx = rng.choice(X.shape[0], max_samples, replace=False)
        X, y = X[idx], y[idx]

    scaler_X = StandardScaler()
    X = scaler_X.fit_transform(X)

    scaler_y = StandardScaler()
    y = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, random_state=seed
    )
    return X_train, y_train, X_test, y_test


# ---------------------------------------------------------------------------
# NN_Training benchmark class
# ---------------------------------------------------------------------------

[docs] class NN_Training: """ Neural Network Weight Training benchmark suite for single-task optimization. The decision variables are the **flattened weights and biases** of a fixed-architecture MLP. The optimization algorithm directly searches for optimal weight configurations -- evaluation is a single forward pass (no gradient-based training), making these problems fast to evaluate. Data is split into train / test. The optimization objective is the **test set error rate** (classification) or **test MSE** (regression). Problems are ordered from easy to hard (by dimension & difficulty): +-----+-----------------------+---------------+--------+-----------------+ | P | Dataset | Architecture | Dim | Task type | +=====+=======================+===============+========+=================+ | P1 | California Housing | [8, 10, 1] | 101 | Regression | | P2 | Diabetes | [10, 10, 1] | 121 | Regression | | P3 | Digits | [64, 10, 10] | 760 | Classification | | P4 | Covertype | [54, 20, 7] | 1247 | Classification | | P5 | Digits (large net) | [64, 20, 10] | 1510 | Classification | | P6 | Covertype (large net) | [54, 30, 7] | 1867 | Classification | +-----+-----------------------+---------------+--------+-----------------+ Objectives (minimize): - Classification: test error rate (1 - accuracy), range [0, 1] - Regression: test MSE on standardized targets Bounds: [-3, 3] for all weight parameters. Parameters ---------- test_ratio : float, optional Fraction of data for testing (default 0.3). seed : int, optional Random seed for train/test split (default 42). """ problem_information = { 'n_cases': 6, 'n_tasks': '1', 'n_dims': '[101, 1867]', 'n_objs': '1', 'n_cons': '0', 'type': 'real_world', } _WEIGHT_BOUND = 3.0 def __init__(self, test_ratio=0.3, seed=42): self.test_ratio = test_ratio self.seed = seed self._cache = {} # ----- Internal helpers ----- def _get_cls(self, name, loader_func, arch, max_samples=None): """Load (or retrieve cached) classification dataset and build NN.""" if name not in self._cache: X_tr, y_tr, X_te, y_te, n_cls = _load_cls_data( loader_func, max_samples, self.test_ratio, self.seed ) nn = _FixedNN(arch) self._cache[name] = ('cls', X_tr, y_tr, X_te, y_te, n_cls, nn) return self._cache[name] def _get_reg(self, name, loader_func, arch, max_samples=None): """Load (or retrieve cached) regression dataset and build NN.""" if name not in self._cache: X_tr, y_tr, X_te, y_te = _load_reg_data( loader_func, max_samples, self.test_ratio, self.seed ) nn = _FixedNN(arch) self._cache[name] = ('reg', X_tr, y_tr, X_te, y_te, nn) return self._cache[name] def _cls_problem(self, name, loader_func, arch, max_samples=None): """Create a single-task classification MTOP (test error objective).""" _, X_tr, y_tr, X_te, y_te, n_cls, nn = self._get_cls( name, loader_func, arch, max_samples ) dim = nn.n_params def objective(x): x = np.atleast_2d(x) results = np.zeros((x.shape[0], 1)) for i in range(x.shape[0]): logits = nn.forward(X_te, x[i]) preds = np.argmax(logits, axis=1) results[i, 0] = 1.0 - np.mean(preds == y_te) return results problem = MTOP() problem.add_task( objective, dim=dim, lower_bound=-self._WEIGHT_BOUND, upper_bound=self._WEIGHT_BOUND ) return problem def _reg_problem(self, name, loader_func, arch, max_samples=None): """Create a single-task regression MTOP (test MSE objective).""" _, X_tr, y_tr, X_te, y_te, nn = self._get_reg( name, loader_func, arch, max_samples ) dim = nn.n_params def objective(x): x = np.atleast_2d(x) results = np.zeros((x.shape[0], 1)) for i in range(x.shape[0]): preds = nn.forward(X_te, x[i]).flatten() results[i, 0] = np.mean((preds - y_te) ** 2) return results problem = MTOP() problem.add_task( objective, dim=dim, lower_bound=-self._WEIGHT_BOUND, upper_bound=self._WEIGHT_BOUND ) return problem # ----- Single-task problems (easy → hard) -----
[docs] def P1(self) -> MTOP: """ Problem 1: **California Housing** regression. Architecture: [8, 10, 1], 101-D. 5000 samples (subsampled). Objective: test MSE, minimize. """ return self._reg_problem( 'california', fetch_california_housing, [8, 10, 1], max_samples=5000 )
[docs] def P2(self) -> MTOP: """ Problem 2: **Diabetes** regression. Architecture: [10, 10, 1], 121-D. 442 samples. Objective: test MSE, minimize. """ return self._reg_problem( 'diabetes', load_diabetes, [10, 10, 1] )
[docs] def P3(self) -> MTOP: """ Problem 3: **Digits** classification (small net). Architecture: [64, 10, 10], 760-D. 1797 samples, 10 classes. Objective: test error rate, minimize. """ return self._cls_problem( 'digits_s', load_digits, [64, 10, 10] )
[docs] def P4(self) -> MTOP: """ Problem 4: **Covertype** classification (medium net). Architecture: [54, 20, 7], 1247-D. 5000 samples (subsampled), 7 classes. Objective: test error rate, minimize. """ return self._cls_problem( 'covtype_m', fetch_covtype, [54, 20, 7], max_samples=5000 )
[docs] def P5(self) -> MTOP: """ Problem 5: **Digits** classification (large net). Architecture: [64, 20, 10], 1510-D. 1797 samples, 10 classes. Objective: test error rate, minimize. """ return self._cls_problem( 'digits_l', load_digits, [64, 20, 10] )
[docs] def P6(self) -> MTOP: """ Problem 6: **Covertype** classification (large net). Architecture: [54, 30, 7], 1857-D. 5000 samples (subsampled), 7 classes. Objective: test error rate, minimize. """ return self._cls_problem( 'covtype_l', fetch_covtype, [54, 30, 7], max_samples=5000 )