Source code for bandit.environment

"""
Single-state environments that contain
"""

from typing import List, Union

import numpy as np

from bandit.reward import GaussianReward, PoissonReward


[docs]class Environment:
    """
    A single-state environment that contains a list of rewards for actions.
    """

    def __init__(self, rewards: List):
        for r in rewards:
            assert type(r) in [GaussianReward, PoissonReward], "invalid reward"
        self.rewards = rewards

    def __len__(self) -> int:
        return len(self.rewards)

[docs]    def action(self, i: int) -> Union[float, int]:
        """
        Given a choice of action, produce a reward for that aciton.

        Args:
            i (int): action to be taken

        Returns:
            (float) reward from that action
        """
        assert i > -1
        assert i < self.__len__()
        return self.rewards[i].get_reward()

[docs]    def expected_rewards(self) -> float:
        """
        Produce the expected rewards for all possible actions.

        Returns:
            (List[float]) expected rewards (true values)
        """
        return np.array([r.expected_reward() for r in self.rewards])

[docs]    def moments(self, kind: str = "mv") -> List[float]:
        """
        Statistical moments of all actions.

        Args:
            kind (str): which moments to compute; default is "mv"

        Returns:
            (np.ndarray) statistical moments of actions
        """
        return np.array([r.moments(kind) for r in self.rewards])