Source code for xplainable.core.optimisation.genetic

""" Copyright Xplainable Pty Ltd, 2023"""

import numpy as np
import pandas as pd
from typing import Union
from ..models import XRegressor

[docs]class XEvolutionaryNetwork: """ A layer-based optimisation framework for XRegressor models. XEvolutionaryNetwork is a novel optimisation framework for XRegressor models that allows for flexibility and depth. It is inspired by deep learning frameworks, but is applied over additive models for weight optimisation. It works by taking a pre-trained XRegressor model and fitting it, along with the training data, to an evolutionary network. The evolutionary network consists of a series of layers, each of which is responsible for optimising the model weights given a set of constraints. What are layers?: There are currently two types of layers: Tighten() and Evolve(). More information on each layer can be found in their respective documentation. There is no limit to the number of layers that can be added to the network, and each layer can be customised for specific objectives. Like other machine learning methods, the network can be prone to over-fitting, so it is recommended to use a validation set to monitor performance. An XEvolutionaryNetwork can be stopped mid-training and resumed at any time. This is useful for long-running optimisations and iterative work. You can track the remaining and completed layers using the `future_layers` and `completed_layers` attributes. Args: model (XRegressor): The model to optimise. apply_range (bool): Whether to apply the model's prediction range to the output. """ def __init__(self, model: 'XRegressor', apply_range: bool = False): self.model = model self.apply_range = apply_range self.root_chromosome = np.array([]) self.x = None self.y = None self._mask = None self.static_scores = None self.leaves = [] self.future_layers = [] self.completed_layers = [] self.layer_id = 0 self.checkpoint_score = None
[docs] def add_layer(self, layer, idx:int = None): """ Adds a layer to the network. Args: layer (Tighten | Evolve): The layer to add. idx (int, optional): The index to add the layer at. """ idx = len(self.future_layers) if idx is None else idx self.future_layers.insert(idx, layer)
[docs] def drop_layer(self, idx: int): """ Removes a layer from the network. Args: idx (int): The index of the layer to remove. """ self.future_layers.pop(idx)
[docs] def clear_layers(self): """ Removes all layers from the network. """ self.future_layers = []
[docs] def fit( self, x: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray], subset: list = [] ) -> 'XEvolutionaryNetwork': """ Fits the model and data to the evolutionary network. Args: x (pd.DataFrame | np.ndarray): The data to fit. y (pd.Series | np.ndarray): The target to fit. subset (list, optional): A list of columns to subset for feature level optimisation. Returns: XEvolutionaryNetwork: The fitted network. """ x = x.copy() # Remove id columns if they exist if len(self.model.id_columns) > 0: x = x.drop(columns=[i for i in self.model.id_columns if i in x.columns]) # Handlers for subsetting if len(subset) > 0: x_trans = self.model._transform(x) subset_locs = x.columns.get_indexer(subset) self.static_scores = np.delete( x_trans, subset_locs, axis=1).sum(axis=1) subset_locs = [str(i) for i in subset_locs] mask_columns = [str(i) for i in range(len(x.columns))] # create leaf-value mask if len(subset) == 0: _mask = pd.get_dummies( pd.DataFrame( self.model._get_leaf_ids(x).astype(int), columns=mask_columns), columns=mask_columns, prefix_sep="_") != 0 # Filter a subset for feature level optimisation else: id_df = pd.DataFrame( self.model._get_leaf_ids(x).astype(int), columns=mask_columns ) id_df = id_df[subset_locs] _mask = pd.get_dummies(id_df, columns=subset_locs, prefix_sep='_') != 0 self._mask_df = _mask self._mask = _mask.to_numpy() # Copy mask for output _df = _mask.copy() # Get list of leaf nodes self.leaves = np.array(_df.columns) # create values column for each leaf for i in _df.columns: f, _id = i.split("_") score = self.model._profile[int(f)][int(_id)][-4] _df[i] = _df[i].map({True: score}) self.root_chromosome = np.append(self.root_chromosome, score) self.x = _df.to_numpy() self.y = y.to_numpy() return self
[docs] def optimise(self, callback=None) -> 'XEvolutionaryNetwork': """ Sequentially runs the layers in the network. Args: callback (any, optional): Callback for progress tracking. Returns: XEvolutionaryNetwork: The evolutionary network. """ if len(self.future_layers) == 0: raise ValueError('Must include at least one optimisation layer') for i, layer in enumerate(list(self.future_layers)): self.layer_name = type(layer).__name__ self.x, self.root_chromosome = layer.transform(self, self.x, self.y, callback) self.completed_layers.append(layer) self.future_layers.remove(layer) self.layer_id += 1 # Set optimised flag self.model.metadata["optimised"] = True return self