Source code for nnabla_rl.models.q_function

# Copyright 2020,2021 Sony Corporation.
# Copyright 2021,2022,2023,2024 Sony Group Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from abc import ABCMeta, abstractmethod

import nnabla as nn
import nnabla.functions as NF
import nnabla_rl.functions as RF
from nnabla_rl.models.model import Model


[docs]class QFunction(Model, metaclass=ABCMeta):
    """Base QFunction Class."""

[docs]    @abstractmethod
    def q(self, s: nn.Variable, a: nn.Variable) -> nn.Variable:
        """Compute Q-value for given state and action.

        Args:
            s (nn.Variable): state variable
            a (nn.Variable): action variable

        Returns:
            nn.Variable: Q-value for given state and action
        """
        raise NotImplementedError

[docs]    def all_q(self, s: nn.Variable) -> nn.Variable:
        """Compute Q-values for each action for given state.

        Args:
            s (nn.Variable): state variable

        Returns:
            nn.Variable: Q-values for each action for given state
        """
        raise NotImplementedError

[docs]    def max_q(self, s: nn.Variable) -> nn.Variable:
        """Compute maximum Q-value for given state.

        Args:
            s (nn.Variable): state variable

        Returns:
            nn.Variable: maximum Q-value value for given state
        """
        raise NotImplementedError

[docs]    def argmax_q(self, s: nn.Variable) -> nn.Variable:
        """Compute the action which maximizes the Q-value for given state.

        Args:
            s (nn.Variable): state variable

        Returns:
            nn.Variable: action which maximizes the Q-value for given state
        """
        raise NotImplementedError


class DiscreteQFunction(QFunction):
    """Base QFunction Class for discrete action environment."""

    @abstractmethod
    def all_q(self, s: nn.Variable) -> nn.Variable:
        raise NotImplementedError

    def q(self, s: nn.Variable, a: nn.Variable) -> nn.Variable:
        q_values = self.all_q(s)
        q_value = NF.sum(
            q_values * NF.one_hot(NF.reshape(a, (-1, 1), inplace=False), (q_values.shape[1],)), axis=1, keepdims=True
        )  # get q value of a

        return q_value

    def max_q(self, s: nn.Variable) -> nn.Variable:
        q_values = self.all_q(s)
        return NF.max(q_values, axis=1, keepdims=True)

    def argmax_q(self, s: nn.Variable) -> nn.Variable:
        q_values = self.all_q(s)
        return RF.argmax(q_values, axis=1, keepdims=True)


class ContinuousQFunction(QFunction):
    """Base QFunction Class for continuous action environment."""

    pass


class FactoredContinuousQFunction(ContinuousQFunction):
    """Base FactoredContinuousQFunction Class for continuous action
    environment."""

    @abstractmethod
    def factored_q(self, s: nn.Variable, a: nn.Variable) -> nn.Variable:
        """Compute factored Q-value for given state.

        Args:
            s (nn.Variable): state variable
            a (nn.Variable): action variable

        Returns:
            nn.Variable: factored Q-value value for given state
        """
        raise NotImplementedError

    @property
    @abstractmethod
    def num_factors(self) -> int:
        """Return the number of output dimensions.

        Returns:
            nn.Variable: output dimensions
        """
        raise NotImplementedError