Source code for tfrddlsim.simulation.transition_simulator

# This file is part of tf-rddlsim.

# tf-rddlsim is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# tf-rddlsim is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with tf-rddlsim. If not, see <http://www.gnu.org/licenses/>.

from typing import Optional, Sequence, Tuple
import tensorflow as tf

from rddl2tf.compilers import Compiler
from rddl2tf.core.fluent import TensorFluent


Shape = Sequence[int]
FluentPair = Tuple[str, TensorFluent]

ActionTensor = Sequence[tf.Tensor]
StateTensor = Sequence[tf.Tensor]
StatesTensor = Sequence[tf.Tensor]
ActionsTensor = Sequence[tf.Tensor]
IntermsTensor = Sequence[tf.Tensor]


CellOutput = Tuple[StatesTensor, ActionsTensor, IntermsTensor, tf.Tensor]
CellState = Sequence[tf.Tensor]


[docs]class ActionSimulationCell(tf.nn.rnn_cell.RNNCell):
    '''ActionSimulationCell implements an MDP transition cell.

    It extends a RNNCell in order to simulate the next state,
    given the current state and action. The cell input is the
    action fluents and the cell output is the next state fluents.

    Note:
        All fluents are represented in factored form as Sequence[tf.Tensors].

    Args:
        compiler (:obj:`rddl2tf.compiler.Compiler`): RDDL2TensorFlow compiler.
        batch_size (int): The simulation batch size.
    '''

    def __init__(self, compiler: Compiler, batch_size: int = 1) -> None:
        self._compiler = compiler
        self._batch_size = batch_size

    @property
    def state_size(self) -> Sequence[Shape]:
        '''Returns the MDP state size.'''
        return self._compiler.rddl.state_size

    @property
    def action_size(self) -> Sequence[Shape]:
        '''Returns the MDP action size.'''
        return self._compiler.rddl.action_size

    @property
    def interm_size(self) -> Sequence[Shape]:
        '''Returns the MDP intermediate state size.'''
        return self._compiler.rddl.interm_size

    @property
    def output_size(self) -> Tuple[Sequence[Shape], Sequence[Shape], Sequence[Shape], int]:
        '''Returns the simulation cell output size.'''
        return (self.state_size, self.action_size, self.interm_size, 1)

[docs]    def __call__(self,
            inputs: ActionTensor,
            state: StateTensor,
            scope: Optional[str] = None) -> Tuple[CellOutput, CellState]:
        '''Returns the transition simulation cell for the given `input` and `state`.

        The cell outputs the reward as an 1-dimensional tensor, and
        the next state as a tuple of tensors.

        Note:
            All tensors have shape: (batch_size, fluent_shape).

        Args:
            input (tf.Tensor): The current action.
            state (Sequence[tf.Tensor]): The current state.
            scope (Optional[str]): Operations' scope in computation graph.

        Returns:
            Tuple[CellOutput, CellState]: (output, next_state).
        '''
        # action
        action = inputs

        # next state
        transition_scope = self._compiler._scope.transition(self._compiler.non_fluents, state, action)
        interm_fluents, next_state_fluents = self._compiler._compile_cpfs(transition_scope)

        # reward
        next_state_scope = dict(next_state_fluents)
        transition_scope.update(next_state_scope)
        reward = self._compiler._compile_reward(transition_scope)
        reward = self._output_size(reward.tensor)

        # outputs
        interm_state = self._output(interm_fluents)
        next_state = self._output(next_state_fluents)
        output = (next_state, action, interm_state, reward)

        return (output, next_state)

    @classmethod
    def _output_size(cls, tensor):
        if tensor.shape.ndims == 1:
            tensor = tf.expand_dims(tensor, -1)
        return tensor

[docs]    @classmethod
    def _output(cls, fluents: Sequence[FluentPair]) -> Sequence[tf.Tensor]:
        '''Converts `fluents` to tensors with datatype tf.float32.'''
        output = []
        for _, fluent in fluents:
            tensor = fluent.tensor
            if tensor.dtype != tf.float32:
                tensor = tf.cast(tensor, tf.float32)
            output.append(tensor)
        return tuple(output)
Source code for tfrddlsim.simulation.transition_simulator

tfrddlsim

Navigation

Related Topics