Source code for solaris.nets.torch_callbacks

"""PyTorch Callbacks."""

import os
import numpy as np
from .metrics import metric_dict
import torch

 
[docs]class TorchEarlyStopping(object):
    """Tracks if model training should stop based on rate of improvement.

    Arguments
    ---------
    patience : int, optional
        The number of epochs to wait before stopping the model if the metric
        didn't improve. Defaults to 5.
    threshold : float, optional
        The minimum metric improvement required to count as "improvement".
        Defaults to ``0.0`` (any improvement satisfies the requirement).
    verbose : bool, optional
        Verbose text output. Defaults to off (``False``). _NOTE_ : This
        currently does nothing.
    """

    def __init__(self, patience=5, threshold=0.0, verbose=False):
        self.patience = patience
        self.threshold = threshold
        self.counter = 0
        self.best = None
        self.stop = False

    def __call__(self, metric_score):

        if self.best is None:
            self.best = metric_score
            self.counter = 0
        else:
            if self.best - self.threshold < metric_score:
                self.counter += 1
            else:
                self.best = metric_score
                self.counter = 0

        if self.counter >= self.patience:
            self.stop = True


[docs]class TorchTerminateOnNaN(object):
    """Sets a stop condition if the model loss achieves an NaN or inf value.

    Arguments
    ---------
    patience : int, optional
        The number of epochs that must display an NaN loss value before
        stopping. Defaults to ``1``.
    verbose : bool, optional
        Verbose text output. Defaults to off (``False``). _NOTE_ : This
        currently does nothing.
    """

    def __init__(self, patience=1, verbose=False):
        self.patience = patience
        self.counter = 0
        self.stop = False

    def __call__(self, loss):
        if np.isnan(loss) or np.isinf(loss):
            self.counter += 1
            if self.counter >= self.patience:
                self.stop = True
        else:
            self.counter = 0


[docs]class TorchTerminateOnMetricNaN(object):
    """Sets a stop condition if a training metric achieves an NaN or inf value.

    Arguments
    ---------
    stopping_metric : str
        The name of the metric to stop on. The name must match a key in
        :const:`solaris.nets.metrics.metric_dict` .
    patience : int, optional
        The number of epochs that must display an NaN loss value before
        stopping. Defaults to ``1``.
    verbose : bool, optional
        Verbose text output. Defaults to off (``False``). _NOTE_ : This
        currently does nothing.
    """

    def __init__(self, stopping_metric, patience=1, verbose=False):
        self.metric = metric_dict[stopping_metric]
        self.patience = patience
        self.counter = 0
        self.stop = False

    def __call__(self, y_true, y_pred):
        if np.isinf(self.metric(y_true, y_pred)) or \
                np.isnan(self.metric(y_true, y_pred)):
            self.counter += 1
            if self.counter >= self.patience:
                self.stop = True
        else:
            self.counter = 0


[docs]class TorchModelCheckpoint(object):
    """Save the model at specific points using Keras checkpointing args.

    Arguments
    ---------
    filepath : str, optional
        Path to save the model file to. The end of the path (before the
        file extension) will have ``'_[epoch]'`` added to it to ID specific
        checkpoints.
    monitor : str, optional
        The loss value to monitor. Options are
        ``['loss', 'val_loss', 'periodic']`` or a metric from the keys in
        :const:`solaris.nets.metrics.metric_dict` . Defaults to ``'loss'`` . If
        ``'periodic'``, it saves every n epochs (see `period` below).
    verbose : bool, optional
        Verbose text output. Defaults to ``False`` .
    save_best_only : bool, optional
        Save only the model with the best value? Defaults to no (``False`` ).
    mode : str, optional
        One of ``['auto', 'min', 'max']``. Is a better value higher or lower?
        Defaults to ``'auto'`` in which case it tries to infer it (if
        ``monitor='loss'`` or ``monitor='val_loss'`` , it assumes ``'min'`` ,
        if it's a metric it assumes ``'max'`` .) If ``'min'``, it assumes lower
        values are better; if ``'max'`` , it assumes higher values are better.
    period : int, optional
        If using ``monitor='periodic'`` , this saves models every `period`
        epochs. Otherwise, it sets the minimum number of epochs between
        checkpoints.
    """

    def __init__(self, filepath='', monitor='loss', verbose=False,
                 save_best_only=False, mode='auto', period=1,
                 weights_only=True):

        self.filepath = filepath
        self.monitor = monitor
        if self.monitor not in ['loss', 'val_loss', 'periodic']:
            self.monitor = metric_dict[self.monitor]
        self.verbose = verbose
        self.save_best_only = save_best_only
        self.period = period
        self.weights_only = weights_only
        self.mode = mode
        if self.mode == 'auto':
            if self.monitor in ['loss', 'val_loss']:
                self.mode = 'min'
            else:
                self.mode = 'max'

        self.epoch = 0
        self.last_epoch = 0
        self.last_saved_value = None

    def __call__(self, model, loss_value=None, y_true=None, y_pred=None):
        """Run a round of model checkpointing for an epoch.

        Arguments
        ---------
        model : model object
            The model to be saved during checkpoints. Must be a PyTorch model.
        loss_value : numeric, optional
            The numeric output of the loss function. Only required if using
            ``monitor='loss'`` or ``monitor='val_loss'`` .
        y_true : :class:`np.array` , optional
            The labels for the validation data. Only required if using
            a metric as the monitored value.
        y_pred : :class:`np.array` , optional
            The predicted values from the model. Only required if using
            a metric as the monitored value.
        """

        self.epoch += 1
        if self.monitor == 'periodic': # update based on period
            if self.last_epoch + self.period <= self.epoch:
                # self.last_saved_value = loss_value if loss_value else 0
                self.save(model, self.weights_only)
                self.last_epoch = self.epoch


        elif self.monitor in ['loss', 'val_loss']:
            if self.last_saved_value is None:
                self.last_saved_value = loss_value
                if self.last_epoch + self.period <= self.epoch:
                    self.save(model, self.weights_only)
                    self.last_epoch = self.epoch
            if self.last_epoch + self.period <= self.epoch:
                if self.check_is_best_value(loss_value):
                    self.last_saved_value = loss_value
                    self.save(model, self.weights_only)
                    self.last_epoch = self.epoch

        else:
            if self.last_saved_value is None:
                self.last_saved_value = self.monitor(y_true, y_pred)
                if self.last_epoch + self.period <= self.epoch:
                    self.save(model, self.weights_only)
                    self.last_epoch = self.epoch
            if self.last_epoch + self.period <= self.epoch:
                metric_value = self.monitor(y_true, y_pred)
                if self.check_is_best_value(metric_value):
                    self.last_saved_value = metric_value
                    self.save(model, self.weights_only)
                    self.last_epoch = self.epoch

[docs]    def check_is_best_value(self, value):
        """Check if `value` is better than the best stored value."""
        if self.mode == 'min' and self.last_saved_value > value:
            return True
        elif self.mode == 'max' and self.last_saved_value < value:
            return True
        else:
            return False

[docs]    def save(self, model, weights_only):
        """Save the model.

        Arguments
        ---------
        model : :class:`torch.nn.Module`
            A PyTorch model instance to save.
        weights_only : bool, optional
            Should the entire model be saved, or only its weights (also known
            as the state_dict)? Defaults to ``False`` (saves entire model). The
            entire model must be saved to resume training without re-defining
            the model architecture, optimizer, and loss function.
        """
        save_name = os.path.splitext(self.filepath)[0] + '_epoch{}_{}'.format(
            self.epoch, np.round(self.last_saved_value, 3))
        save_name = save_name + os.path.splitext(self.filepath)[1]
        if isinstance(model, torch.nn.DataParallel):
            to_save = model.module
        else:
            to_save = model
        if weights_only:
            torch.save(to_save.state_dict(), save_name)
        else:
            torch.save(to_save, save_name)


torch_callback_dict = {
    "early_stopping": TorchEarlyStopping,
    "model_checkpoint": TorchModelCheckpoint,
    "terminate_on_nan": TorchTerminateOnNaN,
    "terminate_on_metric_nan": TorchTerminateOnMetricNaN
}