Source code for gama.search_methods.asha

from functools import partial
import logging
import math
from typing import List, Optional, Dict, Tuple, Any

import pandas as pd
import stopit

from gama.genetic_programming.operator_set import OperatorSet
from gama.logging.evaluation_logger import EvaluationLogger
from gama.search_methods.base_search import BaseSearch
from gama.utilities.generic.async_evaluator import AsyncEvaluator
from gama.genetic_programming.components.individual import Individual

log = logging.getLogger(__name__)


[docs]class AsynchronousSuccessiveHalving(BaseSearch):
    """ Asynchronous Halving Algorithm by Li et al.

    paper: https://arxiv.org/abs/1810.05934

    Parameters
    ----------
    reduction_factor: int, optional (default=3)
        Reduction factor of candidates between each rung.
    minimum_resource: int, optional (default=100)
        Number of samples to use in the lowest rung.
    maximum_resource: int, optional (default=number of samples in the dataset)
        Number of samples to use in the top rung.
        This should not exceed the number of samples in the data.
    minimum_early_stopping_rate: int (default=1)
        Number of lowest rungs to skip.
    """

    def __init__(
        self,
        reduction_factor: Optional[int] = None,
        minimum_resource: Optional[int] = None,
        maximum_resource: Optional[int] = None,
        minimum_early_stopping_rate: Optional[int] = None,
    ):
        super().__init__()
        # maps hyperparameter -> (set value, default)
        self._hyperparameters: Dict[str, Tuple[Any, Any]] = dict(
            reduction_factor=(reduction_factor, 3),
            minimum_resource=(minimum_resource, 100),
            maximum_resource=(maximum_resource, 100_000),
            minimum_early_stopping_rate=(minimum_early_stopping_rate, 1),
        )
        self.output = []

        self.logger = partial(
            EvaluationLogger,
            extra_fields=dict(rung=lambda e: e.individual.meta.get("rung", "unknown")),
        )

    def dynamic_defaults(self, x: pd.DataFrame, y: pd.DataFrame, time_limit: float):
        # `maximum_resource` is the number of samples used in the highest rung.
        # this typically should be the number of samples in the (training) dataset.
        self._overwrite_hyperparameter_default("maximum_resource", len(y))

    def search(self, operations: OperatorSet, start_candidates: List[Individual]):
        self.output = asha(
            operations, start_candidates=start_candidates, **self.hyperparameters
        )


def asha(
    operations: OperatorSet,
    start_candidates: List[Individual],
    reduction_factor: int = 3,
    minimum_resource: int = 100,
    maximum_resource: int = 100_000,
    minimum_early_stopping_rate: int = 1,
    max_full_evaluations: Optional[int] = None,
) -> List[Individual]:
    """ Asynchronous Halving Algorithm by Li et al.

    paper: https://arxiv.org/abs/1810.05934

    Parameters
    ----------
    operations: OperatorSet
        An operator set with `evaluate` and `individual` functions.
    start_candidates: List[Individual]
        A list which contains the set of best found individuals during search.
    reduction_factor: int (default=3)
        Reduction factor of candidates between each rung.
    minimum_resource: int (default=100)
        Number of samples to use in the lowest rung.
    maximum_resource: int (default=100_000)
        Number of samples to use in the top rung.
        This should not exceed the number of samples in the data.
    minimum_early_stopping_rate: int (default=1)
        Number of lowest rungs to skip.
    max_full_evaluations: Optional[int] (default=None)
        Maximum number of individuals to evaluate on the max rung (i.e. on all data).
        If None, the algorithm will be run indefinitely.

    Returns
    -------
    List[Individual]
        Individuals of the highest rung in which
        at least one individual has been evaluated.
    """

    # Note that here we index the rungs by all possible rungs (0..ceil(log_eta(R/r))),
    # and ignore the first minimum_early_stopping_rate rungs.
    # This contrasts the paper where rung 0 refers to the first used one.
    max_rung = math.ceil(
        math.log(maximum_resource / minimum_resource, reduction_factor)
    )
    rungs = range(minimum_early_stopping_rate, max_rung + 1)
    rung_resources = {
        rung: min(minimum_resource * (reduction_factor ** rung), maximum_resource)
        for rung in rungs
    }
    evaluate = partial(
        evaluate_on_rung, evaluate_individual=operations.evaluate, max_rung=max_rung
    )

    # Highest rungs first is how we typically iterate them
    # Should we just use lists of lists/heaps instead?
    rung_individuals: Dict[int, List[Tuple[float, Individual]]] = {
        rung: [] for rung in reversed(rungs)
    }
    promoted_individuals: Dict[int, List[Individual]] = {
        rung: [] for rung in reversed(rungs)
    }

    def get_job():
        for rung, individuals in list(rung_individuals.items())[1:]:
            # This is not in the paper code but is derived from fig 2b
            n_to_promote = math.floor(len(individuals) / reduction_factor)
            if n_to_promote - len(promoted_individuals[rung]) > 0:
                # Problem: equal loss falls back on comparison of individual
                not_promoted = set(individuals) - set(promoted_individuals[rung])
                if len(not_promoted) > 0:
                    to_promote = max(not_promoted, key=lambda i: i[0])
                    promoted_individuals[rung].append(to_promote)
                    return to_promote[1], rung + 1

        if start_candidates is not None and len(start_candidates) > 0:
            return start_candidates.pop(), minimum_early_stopping_rate
        else:
            return operations.individual(), minimum_early_stopping_rate

    try:
        with AsyncEvaluator() as async_:
            log.info("ASHA start")

            def start_new_job():
                individual, rung = get_job()
                time_penalty = rung_resources[rung] / max(rung_resources.values())
                async_.submit(
                    evaluate,
                    individual,
                    rung,
                    subsample=rung_resources[rung],
                    timeout=(10 + (time_penalty * 600)),
                )

            for _ in range(8):
                start_new_job()

            while (max_full_evaluations is None) or (
                len(rung_individuals[max_rung]) < max_full_evaluations
            ):
                future = operations.wait_next(async_)
                if future.result is not None:
                    rung = future.result.individual.meta["rung"]
                    loss = future.result.score[0]
                    individual = future.result.individual
                    rung_individuals[rung].append((loss, individual))
                start_new_job()

            highest_rung_reached = max(rungs)
    except stopit.TimeoutException:
        log.info("ASHA ended due to timeout.")
        reached_rungs = (rung for rung, inds in rung_individuals.items() if inds != [])
        highest_rung_reached = max(reached_rungs)
        if highest_rung_reached != max(rungs):
            raise RuntimeWarning("Highest rung not reached.")
    finally:
        for rung, individuals in rung_individuals.items():
            log.info(f"[{len(individuals)}] {rung}")
        return list(map(lambda p: p[1], rung_individuals[highest_rung_reached]))


def evaluate_on_rung(individual, rung, max_rung, evaluate_individual, *args, **kwargs):
    evaluation = evaluate_individual(individual, *args, **kwargs)
    evaluation.individual.meta["rung"] = rung
    # We want to avoid saving evaluations that are not on the max rung to disk,
    # because we only want to use pipelines evaluated on the max rung after search.
    # We're working on a better way to relay this information, this is temporary.
    if evaluation.error is None and rung != max_rung:
        evaluation.error = "Not a full evaluation."
    return evaluation