Source code for tfrddlsim.viz.generic_visualizer

# This file is part of tf-rddlsim.

# tf-rddlsim is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# tf-rddlsim is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with tf-rddlsim. If not, see <http://www.gnu.org/licenses/>.

from typing import List, Sequence, Optional, Tuple, Union

import numpy as np

from rddl2tf.compilers import Compiler
from tfrddlsim.viz.abstract_visualizer import Visualizer

Value = Union[bool, int, float, np.array]
NonFluents = Sequence[Tuple[str, Value]]
Fluents = Sequence[Tuple[str, np.array]]


[docs]class GenericVisualizer(Visualizer):
    '''GenericVisualizer is a generic text-based trajectory visualizer.

    Args:
        compiler (:obj:`rddl2tf.compiler.Compiler`): RDDL2TensorFlow compiler
        verbose (bool): Verbosity flag
    '''

    def __init__(self, compiler: Compiler, verbose: bool) -> None:
        super().__init__(compiler, verbose)

[docs]    def render(self,
            trajectories: Tuple[NonFluents, Fluents, Fluents, Fluents, np.array],
            batch: Optional[int] = None) -> None:
        '''Prints the simulated `trajectories`.

        Args:
            trajectories: NonFluents, states, actions, interms and rewards.
            batch: Number of batches to render.
        '''
        self._render_trajectories(trajectories)

[docs]    def _render_trajectories(self,
            trajectories: Tuple[NonFluents, Fluents, Fluents, Fluents, np.array]) -> None:
        '''Prints the first batch of simulated `trajectories`.

        Args:
            trajectories: NonFluents, states, actions, interms and rewards.
        '''
        if self._verbose:
            non_fluents, initial_state, states, actions, interms, rewards = trajectories
            shape = states[0][1].shape
            batch_size, horizon, = shape[0], shape[1]
            states = [(s[0], s[1][0]) for s in states]
            interms = [(f[0], f[1][0]) for f in interms]
            actions = [(a[0], a[1][0]) for a in actions]
            rewards = np.reshape(rewards, [batch_size, horizon])[0]
            self._render_batch(non_fluents, states, actions, interms, rewards)

[docs]    def _render_batch(self,
            non_fluents: NonFluents,
            states: Fluents, actions: Fluents, interms: Fluents,
            rewards: np.array,
            horizon: Optional[int] = None) -> None:
        '''Prints `non_fluents`, `states`, `actions`, `interms` and `rewards`
        for given `horizon`.

        Args:
            states (Sequence[Tuple[str, np.array]]): A state trajectory.
            actions (Sequence[Tuple[str, np.array]]): An action trajectory.
            interms (Sequence[Tuple[str, np.array]]): An interm state trajectory.
            rewards (np.array): Sequence of rewards (1-dimensional array).
            horizon (Optional[int]): Number of timesteps.
        '''
        if horizon is None:
            horizon = len(states[0][1])
            self._render_round_init(horizon, non_fluents)
            for t in range(horizon):
                s = [(s[0], s[1][t]) for s in states]
                f = [(f[0], f[1][t]) for f in interms]
                a = [(a[0], a[1][t]) for a in actions]
                r = rewards[t]
                self._render_timestep(t, s, a, f, r)
            self._render_round_end(rewards)

[docs]    def _render_timestep(self,
            t: int,
            s: Fluents, a: Fluents, f: Fluents,
            r: np.float32) -> None:
        '''Prints fluents and rewards for the given timestep `t`.

        Args:
            t (int): timestep
            s (Sequence[Tuple[str], np.array]: State fluents.
            a (Sequence[Tuple[str], np.array]: Action fluents.
            f (Sequence[Tuple[str], np.array]: Interm state fluents.
            r (np.float32): Reward.
        '''
        print("============================")
        print("TIME = {}".format(t))
        print("============================")
        fluent_variables = self._compiler.rddl.action_fluent_variables
        self._render_fluent_timestep('action', a, fluent_variables)
        fluent_variables = self._compiler.rddl.interm_fluent_variables
        self._render_fluent_timestep('interms', f, fluent_variables)
        fluent_variables = self._compiler.rddl.state_fluent_variables
        self._render_fluent_timestep('states', s, fluent_variables)
        self._render_reward(r)

[docs]    def _render_fluent_timestep(self,
            fluent_type: str,
            fluents: Sequence[Tuple[str, np.array]],
            fluent_variables: Sequence[Tuple[str, List[str]]]) -> None:
        '''Prints `fluents` of given `fluent_type` as list of instantiated variables
        with corresponding values.

        Args:
            fluent_type (str): Fluent type.
            fluents (Sequence[Tuple[str, np.array]]): List of pairs (fluent_name, fluent_values).
            fluent_variables (Sequence[Tuple[str, List[str]]]): List of pairs (fluent_name, args).
        '''
        for fluent_pair, variable_list in zip(fluents, fluent_variables):
            name, fluent = fluent_pair
            _, variables = variable_list
            print(name)
            fluent = fluent.flatten()
            for variable, value in zip(variables, fluent):
                print('- {}: {} = {}'.format(fluent_type, variable, value))
        print()

[docs]    def _render_reward(self, r: np.float32) -> None:
        '''Prints reward `r`.'''
        print("reward = {:.4f}".format(float(r)))
        print()

[docs]    def _render_round_init(self, horizon: int, non_fluents: NonFluents) -> None:
        '''Prints round init information about `horizon` and `non_fluents`.'''
        print('*********************************************************')
        print('>>> ROUND INIT, horizon = {}'.format(horizon))
        print('*********************************************************')
        fluent_variables = self._compiler.rddl.non_fluent_variables
        self._render_fluent_timestep('non-fluents', non_fluents, fluent_variables)

[docs]    def _render_round_end(self, rewards: np.array) -> None:
        '''Prints round end information about `rewards`.'''
        print("*********************************************************")
        print(">>> ROUND END")
        print("*********************************************************")
        total_reward = np.sum(rewards)
        print("==> Objective value = {}".format(total_reward))
        print("==> rewards = {}".format(list(rewards)))
        print()
Source code for tfrddlsim.viz.generic_visualizer

tfrddlsim

Navigation

Related Topics