Source code for gama.postprocessing.base_post_processing

from abc import ABC
from typing import List, Union, Dict, Any, Tuple, TYPE_CHECKING, Sequence

import pandas as pd
from sklearn.base import TransformerMixin

from gama.genetic_programming.components import Individual


if TYPE_CHECKING:
    from gama.gama import Gama


[docs]class BasePostProcessing(ABC): """ All post-processing methods should be derived from this class. This class should not be directly used to configure GAMA. """ def __init__(self, time_fraction: float): """ Parameters ---------- time_fraction: float Fraction of total time that to be reserved for this post-processing step. """ self.time_fraction: float = time_fraction self._hyperparameters: Dict[str, Tuple[Any, Any]] = {} def __str__(self): # Not sure if I should report actual used hyperparameters # (i.e. include default), or only those set by user. user_set_hps = { parameter: set_value for parameter, (set_value, default) in self._hyperparameters.items() if set_value is not None } hp_configuration = ",".join( [f"{name}={value}" for (name, value) in user_set_hps.items()] ) return f"{self.__class__.__name__}({hp_configuration})" @property def hyperparameters(self) -> Dict[str, Any]: """ Hyperparameter (name, value) pairs. Value determined by user > dynamic default > static default. Dynamic default values only considered if `dynamic_defaults` has been called. """ return { parameter: set_value if set_value is not None else default for parameter, (set_value, default) in self._hyperparameters.items() } def _overwrite_hyperparameter_default(self, hyperparameter: str, value: Any): set_value, default_value = self._hyperparameters[hyperparameter] self._hyperparameters[hyperparameter] = (set_value, value) def dynamic_defaults(self, gama: "Gama"): pass
[docs] def post_process( self, x: pd.DataFrame, y: Union[pd.DataFrame, pd.Series], timeout: float, selection: List[Individual], ) -> object: """ Parameters ---------- x: pd.DataFrame all training features y: Union[pd.DataFrame, pd.Series] all training labels timeout: float allowed time in seconds for post-processing selection: List[Individual] individuals selected by the search space, ordered best first Returns ------- Any A model with `predict` and optionally `predict_proba`. """ raise NotImplementedError("Method must be implemented by child class.")
[docs] def to_code( self, preprocessing: Sequence[Tuple[str, TransformerMixin]] = None ) -> str: """ Generate Python code to reconstruct a pipeline that constructs the model. Parameters ---------- preprocessing: Sequence[TransformerMixin], optional (default=None) Preprocessing steps that need be executed before the model. Returns ------- str A string of Python code that sets a 'pipeline' variable to the pipeline that defines the final pipeline generated by post-processing. """ raise NotImplementedError("Method is optionally implemented by child class.")