Source code for gama.postprocessing.best_fit

from typing import List, Optional, Sequence, Tuple

import pandas as pd
from sklearn.base import TransformerMixin

from gama.genetic_programming.components import Individual
from gama.postprocessing.base_post_processing import BasePostProcessing
from gama.utilities.export import (
    imports_and_steps_for_individual,
    transformers_to_str,
    format_import,
    format_pipeline,
)


[docs]class BestFitPostProcessing(BasePostProcessing): """ Post processing technique which trains the best found single pipeline. """ def __init__(self, time_fraction: float = 0.1): super().__init__(time_fraction) self._selected_individual: Optional[Individual] = None def post_process( self, x: pd.DataFrame, y: pd.Series, timeout: float, selection: List[Individual] ) -> object: self._selected_individual = selection[0] return self._selected_individual.pipeline.fit(x, y) def to_code( self, preprocessing: Sequence[Tuple[str, TransformerMixin]] = None ) -> str: if self._selected_individual is None: raise RuntimeError("`to_code` can only be called after `post_process`.") imports, steps = imports_and_steps_for_individual(self._selected_individual) if preprocessing is not None: trans_strs = transformers_to_str([t for _, t in preprocessing]) names = [name for name, _ in preprocessing] steps = list(zip(names, trans_strs)) + steps imports = imports.union({format_import(t) for _, t in preprocessing}) pipeline_statement = format_pipeline(steps) script = "\n".join(imports) + "\n\n" + pipeline_statement return script