Source code for bandit.environment

Single-state environments that contain

from typing import List, Union

import numpy as np

from bandit.reward import GaussianReward, PoissonReward

[docs]class Environment: """ A single-state environment that contains a list of rewards for actions. """ def __init__(self, rewards: List): for r in rewards: assert type(r) in [GaussianReward, PoissonReward], "invalid reward" self.rewards = rewards def __len__(self) -> int: return len(self.rewards)
[docs] def action(self, i: int) -> Union[float, int]: """ Given a choice of action, produce a reward for that aciton. Args: i (int): action to be taken Returns: (float) reward from that action """ assert i > -1 assert i < self.__len__() return self.rewards[i].get_reward()
[docs] def expected_rewards(self) -> float: """ Produce the expected rewards for all possible actions. Returns: (List[float]) expected rewards (true values) """ return np.array([r.expected_reward() for r in self.rewards])
[docs] def moments(self, kind: str = "mv") -> List[float]: """ Statistical moments of all actions. Args: kind (str): which moments to compute; default is "mv" Returns: (np.ndarray) statistical moments of actions """ return np.array([r.moments(kind) for r in self.rewards])