Source code for tfrddlsim.simulation.transition_simulator

# This file is part of tf-rddlsim.

# tf-rddlsim is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# tf-rddlsim is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with tf-rddlsim. If not, see <http://www.gnu.org/licenses/>.

from typing import Optional, Sequence, Tuple
import tensorflow as tf

from rddl2tf.compilers import Compiler
from rddl2tf.core.fluent import TensorFluent


Shape = Sequence[int]
FluentPair = Tuple[str, TensorFluent]

ActionTensor = Sequence[tf.Tensor]
StateTensor = Sequence[tf.Tensor]
StatesTensor = Sequence[tf.Tensor]
ActionsTensor = Sequence[tf.Tensor]
IntermsTensor = Sequence[tf.Tensor]


CellOutput = Tuple[StatesTensor, ActionsTensor, IntermsTensor, tf.Tensor]
CellState = Sequence[tf.Tensor]


[docs]class ActionSimulationCell(tf.nn.rnn_cell.RNNCell): '''ActionSimulationCell implements an MDP transition cell. It extends a RNNCell in order to simulate the next state, given the current state and action. The cell input is the action fluents and the cell output is the next state fluents. Note: All fluents are represented in factored form as Sequence[tf.Tensors]. Args: compiler (:obj:`rddl2tf.compiler.Compiler`): RDDL2TensorFlow compiler. batch_size (int): The simulation batch size. ''' def __init__(self, compiler: Compiler, batch_size: int = 1) -> None: self._compiler = compiler self._batch_size = batch_size @property def state_size(self) -> Sequence[Shape]: '''Returns the MDP state size.''' return self._compiler.rddl.state_size @property def action_size(self) -> Sequence[Shape]: '''Returns the MDP action size.''' return self._compiler.rddl.action_size @property def interm_size(self) -> Sequence[Shape]: '''Returns the MDP intermediate state size.''' return self._compiler.rddl.interm_size @property def output_size(self) -> Tuple[Sequence[Shape], Sequence[Shape], Sequence[Shape], int]: '''Returns the simulation cell output size.''' return (self.state_size, self.action_size, self.interm_size, 1)
[docs] def __call__(self, inputs: ActionTensor, state: StateTensor, scope: Optional[str] = None) -> Tuple[CellOutput, CellState]: '''Returns the transition simulation cell for the given `input` and `state`. The cell outputs the reward as an 1-dimensional tensor, and the next state as a tuple of tensors. Note: All tensors have shape: (batch_size, fluent_shape). Args: input (tf.Tensor): The current action. state (Sequence[tf.Tensor]): The current state. scope (Optional[str]): Operations' scope in computation graph. Returns: Tuple[CellOutput, CellState]: (output, next_state). ''' # action action = inputs # next state transition_scope = self._compiler._scope.transition(self._compiler.non_fluents, state, action) interm_fluents, next_state_fluents = self._compiler._compile_cpfs(transition_scope) # reward next_state_scope = dict(next_state_fluents) transition_scope.update(next_state_scope) reward = self._compiler._compile_reward(transition_scope) reward = self._output_size(reward.tensor) # outputs interm_state = self._output(interm_fluents) next_state = self._output(next_state_fluents) output = (next_state, action, interm_state, reward) return (output, next_state)
@classmethod def _output_size(cls, tensor): if tensor.shape.ndims == 1: tensor = tf.expand_dims(tensor, -1) return tensor
[docs] @classmethod def _output(cls, fluents: Sequence[FluentPair]) -> Sequence[tf.Tensor]: '''Converts `fluents` to tensors with datatype tf.float32.''' output = [] for _, fluent in fluents: tensor = fluent.tensor if tensor.dtype != tf.float32: tensor = tf.cast(tensor, tf.float32) output.append(tensor) return tuple(output)