Source code for ddmtolab.Problems.RWO.nn_training

"""Neural Network Weight Training (NN-Training) benchmark problems.

This module provides real-world single-task optimization problems where the
decision variables are the **neural network weights** themselves. The optimization
algorithm (e.g., DE, PSO, GA) directly searches for optimal weight configurations
to minimize classification error or regression loss -- no gradient-based training.

Data is split into train/test sets (70/30 by default). The optimization objective
is the **test set error**: classification error rate or regression MSE.

Datasets used (from scikit-learn):
- Classification: Digits (10 classes), Covertype (7 classes)
- Regression: Diabetes, California Housing

Network architecture: fixed single-hidden-layer MLP with ReLU activation.
Decision variables: flattened weight vector [W1, b1, W2, b2].
"""

import numpy as np
from sklearn.datasets import (
    load_digits, load_diabetes,
    fetch_california_housing, fetch_covtype
)
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from ddmtolab.Methods.mtop import MTOP


# ---------------------------------------------------------------------------
# Fixed-architecture neural network (numpy-only, no PyTorch needed)
# ---------------------------------------------------------------------------

class _FixedNN:
    """
    Fixed-architecture feedforward neural network parameterized by a flat
    weight vector. Uses ReLU activation on hidden layers and linear output.

    Parameters
    ----------
    layer_sizes : list of int
        Sizes of each layer, e.g. [4, 10, 3] means input=4, hidden=10, output=3.

    Attributes
    ----------
    n_params : int
        Total number of trainable parameters (weights + biases).
    """

    def __init__(self, layer_sizes):
        self.layer_sizes = layer_sizes
        self.n_params = sum(
            layer_sizes[i] * layer_sizes[i + 1] + layer_sizes[i + 1]
            for i in range(len(layer_sizes) - 1)
        )

    def forward(self, X, params):
        """
        Forward pass with given flattened parameters.

        Parameters
        ----------
        X : np.ndarray, shape (n_samples, input_dim)
            Input features.
        params : np.ndarray, shape (n_params,)
            Flattened weight vector.

        Returns
        -------
        np.ndarray, shape (n_samples, output_dim)
            Network output (logits for classification, raw values for regression).
        """
        offset = 0
        h = X
        for i in range(len(self.layer_sizes) - 1):
            in_d = self.layer_sizes[i]
            out_d = self.layer_sizes[i + 1]

            W = params[offset:offset + in_d * out_d].reshape(in_d, out_d)
            offset += in_d * out_d
            b = params[offset:offset + out_d]
            offset += out_d

            h = h @ W + b

            # ReLU on all hidden layers (not output)
            if i < len(self.layer_sizes) - 2:
                h = np.maximum(h, 0)

        return h


# ---------------------------------------------------------------------------
# Data loading utilities
# ---------------------------------------------------------------------------

def _load_cls_data(loader_func, max_samples=None, test_ratio=0.3, seed=42):
    """
    Load, standardize, and split a classification dataset.

    Returns
    -------
    tuple
        (X_train, y_train, X_test, y_test, n_classes).
    """
    data = loader_func()
    X = data.data.astype(np.float64)
    y = data.target.astype(np.int64)

    if max_samples and X.shape[0] > max_samples:
        rng = np.random.RandomState(seed)
        idx = rng.choice(X.shape[0], max_samples, replace=False)
        X, y = X[idx], y[idx]

    # Remap labels to 0..n_classes-1 (covtype labels start from 1)
    unique_labels = np.sort(np.unique(y))
    if unique_labels[0] != 0 or not np.array_equal(unique_labels, np.arange(len(unique_labels))):
        label_map = {old: new for new, old in enumerate(unique_labels)}
        y = np.array([label_map[yi] for yi in y])

    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    n_classes = len(np.unique(y))

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, random_state=seed, stratify=y
    )
    return X_train, y_train, X_test, y_test, n_classes


def _load_reg_data(loader_func, max_samples=None, test_ratio=0.3, seed=42):
    """
    Load, standardize, and split a regression dataset.

    Returns
    -------
    tuple
        (X_train, y_train, X_test, y_test).
    """
    data = loader_func()
    X = data.data.astype(np.float64)
    y = data.target.astype(np.float64)

    if max_samples and X.shape[0] > max_samples:
        rng = np.random.RandomState(seed)
        idx = rng.choice(X.shape[0], max_samples, replace=False)
        X, y = X[idx], y[idx]

    scaler_X = StandardScaler()
    X = scaler_X.fit_transform(X)

    scaler_y = StandardScaler()
    y = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, random_state=seed
    )
    return X_train, y_train, X_test, y_test


# ---------------------------------------------------------------------------
# NN_Training benchmark class
# ---------------------------------------------------------------------------


[docs]
class NN_Training:
    """
    Neural Network Weight Training benchmark suite for single-task optimization.

    The decision variables are the **flattened weights and biases** of a
    fixed-architecture MLP. The optimization algorithm directly searches for
    optimal weight configurations -- evaluation is a single forward pass (no
    gradient-based training), making these problems fast to evaluate.

    Data is split into train / test. The optimization objective is the
    **test set error rate** (classification) or **test MSE** (regression).

    Problems are ordered from easy to hard (by dimension & difficulty):

    +-----+-----------------------+---------------+--------+-----------------+
    | P   | Dataset               | Architecture  | Dim    | Task type       |
    +=====+=======================+===============+========+=================+
    | P1  | California Housing    | [8, 10, 1]    | 101    | Regression      |
    | P2  | Diabetes              | [10, 10, 1]   | 121    | Regression      |
    | P3  | Digits                | [64, 10, 10]  | 760    | Classification  |
    | P4  | Covertype             | [54, 20, 7]   | 1247   | Classification  |
    | P5  | Digits (large net)    | [64, 20, 10]  | 1510   | Classification  |
    | P6  | Covertype (large net) | [54, 30, 7]   | 1867   | Classification  |
    +-----+-----------------------+---------------+--------+-----------------+

    Objectives (minimize):

    - Classification: test error rate (1 - accuracy), range [0, 1]
    - Regression: test MSE on standardized targets

    Bounds: [-3, 3] for all weight parameters.

    Parameters
    ----------
    test_ratio : float, optional
        Fraction of data for testing (default 0.3).
    seed : int, optional
        Random seed for train/test split (default 42).
    """

    problem_information = {
        'n_cases': 6,
        'n_tasks': '1',
        'n_dims': '[101, 1867]',
        'n_objs': '1',
        'n_cons': '0',
        'type': 'real_world',
    }

    _WEIGHT_BOUND = 3.0

    def __init__(self, test_ratio=0.3, seed=42):
        self.test_ratio = test_ratio
        self.seed = seed
        self._cache = {}

    # ----- Internal helpers -----

    def _get_cls(self, name, loader_func, arch, max_samples=None):
        """Load (or retrieve cached) classification dataset and build NN."""
        if name not in self._cache:
            X_tr, y_tr, X_te, y_te, n_cls = _load_cls_data(
                loader_func, max_samples, self.test_ratio, self.seed
            )
            nn = _FixedNN(arch)
            self._cache[name] = ('cls', X_tr, y_tr, X_te, y_te, n_cls, nn)
        return self._cache[name]

    def _get_reg(self, name, loader_func, arch, max_samples=None):
        """Load (or retrieve cached) regression dataset and build NN."""
        if name not in self._cache:
            X_tr, y_tr, X_te, y_te = _load_reg_data(
                loader_func, max_samples, self.test_ratio, self.seed
            )
            nn = _FixedNN(arch)
            self._cache[name] = ('reg', X_tr, y_tr, X_te, y_te, nn)
        return self._cache[name]

    def _cls_problem(self, name, loader_func, arch, max_samples=None):
        """Create a single-task classification MTOP (test error objective)."""
        _, X_tr, y_tr, X_te, y_te, n_cls, nn = self._get_cls(
            name, loader_func, arch, max_samples
        )
        dim = nn.n_params

        def objective(x):
            x = np.atleast_2d(x)
            results = np.zeros((x.shape[0], 1))
            for i in range(x.shape[0]):
                logits = nn.forward(X_te, x[i])
                preds = np.argmax(logits, axis=1)
                results[i, 0] = 1.0 - np.mean(preds == y_te)
            return results

        problem = MTOP()
        problem.add_task(
            objective, dim=dim,
            lower_bound=-self._WEIGHT_BOUND,
            upper_bound=self._WEIGHT_BOUND
        )
        return problem

    def _reg_problem(self, name, loader_func, arch, max_samples=None):
        """Create a single-task regression MTOP (test MSE objective)."""
        _, X_tr, y_tr, X_te, y_te, nn = self._get_reg(
            name, loader_func, arch, max_samples
        )
        dim = nn.n_params

        def objective(x):
            x = np.atleast_2d(x)
            results = np.zeros((x.shape[0], 1))
            for i in range(x.shape[0]):
                preds = nn.forward(X_te, x[i]).flatten()
                results[i, 0] = np.mean((preds - y_te) ** 2)
            return results

        problem = MTOP()
        problem.add_task(
            objective, dim=dim,
            lower_bound=-self._WEIGHT_BOUND,
            upper_bound=self._WEIGHT_BOUND
        )
        return problem

    # ----- Single-task problems (easy → hard) -----


[docs]
    def P1(self) -> MTOP:
        """
        Problem 1: **California Housing** regression.

        Architecture: [8, 10, 1], 101-D. 5000 samples (subsampled).
        Objective: test MSE, minimize.
        """
        return self._reg_problem(
            'california', fetch_california_housing,
            [8, 10, 1], max_samples=5000
        )



[docs]
    def P2(self) -> MTOP:
        """
        Problem 2: **Diabetes** regression.

        Architecture: [10, 10, 1], 121-D. 442 samples.
        Objective: test MSE, minimize.
        """
        return self._reg_problem(
            'diabetes', load_diabetes,
            [10, 10, 1]
        )



[docs]
    def P3(self) -> MTOP:
        """
        Problem 3: **Digits** classification (small net).

        Architecture: [64, 10, 10], 760-D. 1797 samples, 10 classes.
        Objective: test error rate, minimize.
        """
        return self._cls_problem(
            'digits_s', load_digits,
            [64, 10, 10]
        )



[docs]
    def P4(self) -> MTOP:
        """
        Problem 4: **Covertype** classification (medium net).

        Architecture: [54, 20, 7], 1247-D. 5000 samples (subsampled), 7 classes.
        Objective: test error rate, minimize.
        """
        return self._cls_problem(
            'covtype_m', fetch_covtype,
            [54, 20, 7], max_samples=5000
        )



[docs]
    def P5(self) -> MTOP:
        """
        Problem 5: **Digits** classification (large net).

        Architecture: [64, 20, 10], 1510-D. 1797 samples, 10 classes.
        Objective: test error rate, minimize.
        """
        return self._cls_problem(
            'digits_l', load_digits,
            [64, 20, 10]
        )



[docs]
    def P6(self) -> MTOP:
        """
        Problem 6: **Covertype** classification (large net).

        Architecture: [54, 30, 7], 1857-D. 5000 samples (subsampled), 7 classes.
        Objective: test error rate, minimize.
        """
        return self._cls_problem(
            'covtype_l', fetch_covtype,
            [54, 30, 7], max_samples=5000
        )