Source code for nnabla_rl.parametric_functions

# Copyright 2021 Sony Group Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable, Optional, Tuple

import numpy as np

import nnabla as nn
import nnabla.functions as NF
import nnabla_rl.functions as RF
from nnabla.initializer import ConstantInitializer
from nnabla.parameter import get_parameter_or_create
from nnabla_rl.initializers import HeUniform


[docs]def noisy_net(inp: nn.Variable,
              n_outmap: int,
              base_axis: int = 1,
              w_init: Optional[Callable[[Tuple[int, ...]], np.ndarray]] = None,
              b_init: Optional[Callable[[Tuple[int, ...]], np.ndarray]] = None,
              noisy_w_init: Optional[Callable[[Tuple[int, ...]], np.ndarray]] = None,
              noisy_b_init: Optional[Callable[[Tuple[int, ...]], np.ndarray]] = None,
              fix_parameters: bool = False,
              rng: Optional[np.random.RandomState] = None,
              with_bias: bool = True,
              with_noisy_bias: bool = True,
              apply_w: Optional[Callable[[nn.Variable], nn.Variable]] = None,
              apply_b: Optional[Callable[[nn.Variable], nn.Variable]] = None,
              apply_noisy_w: Optional[Callable[[nn.Variable], nn.Variable]] = None,
              apply_noisy_b: Optional[Callable[[nn.Variable], nn.Variable]] = None,
              seed: int = -1) -> nn.Variable:
    '''
    Noisy linear layer with factorized gaussian noise  proposed by Fortunato et al. in the paper
    "Noisy networks for exploration". See: https://arxiv.org/abs/1706.10295 for details.

    Args:
        inp (nn.Variable): Input of the layer n_outmaps (int): output dimension of the layer.
        n_outmap (int): Output dimension of the layer.
        base_axis (int): Axis of the input to treat as sample dimensions. Dimensions up to base_axis will be treated
            as sample dimensions. Defaults to 1.
        w_init (None or Callable[[Tuple[int, ...]], np.ndarray]): Initializer of weights used in deterministic stream.
            Defaults to None. If None, will be initialized with Uniform distribution
            :math:`(-\\frac{1}{\\sqrt{fanin}},\\frac{1}{\\sqrt{fanin}})`.
        b_init (None or Callable[[Tuple[int, ...]], np.ndarray]): Initializer of bias used in deterministic stream.
            Defaults to None. If None, will be initialized with Uniform distribution
            :math:`(-\\frac{1}{\\sqrt{fanin}},\\frac{1}{\\sqrt{fanin}})`.
        noisy_w_init (None or Callable[[Tuple[int, ...]], np.ndarray]): Initializer of weights used in noisy stream.
            Defaults to None. If None, will be initialized to a constant value of :math:`\\frac{0.5}{\\sqrt{fanin}}`.
        noisy_b_init (None or Callable[[Tuple[int, ...]], np.ndarray]): Initializer of bias used in noisy stream.
            Defaults to None. If None, will be initialized to a constant value of :math:`\\frac{0.5}{\\sqrt{fanin}}`.
        fix_parameters (bool): If True, underlying weight and bias parameters will Not be updated during training.
            Default to False.
        rng (None or np.random.RandomState): Random number generator for parameter initializer. Defaults to None.
        with_bias (bool): If True, deterministic bias term is included in the computation. Defaults to True.
        with_noisy_bias (bool): If True, noisy bias term is included in the computation. Defaults to True.
        apply_w (None or Callable[[nn.Variable], nn.Variable]): Callable object to apply to the weights on
            initialization. Defaults to None.
        apply_b (None or Callable[[nn.Variable], nn.Variable]): Callable object to apply to the bias on
            initialization. Defaults to None.
        apply_noisy_w (None or Callable[[nn.Variable], nn.Variable]):  Callable object to apply to the noisy weight on
            initialization. Defaults to None.
        apply_noisy_b (None or Callable[[nn.Variable], nn.Variable]):  Callable object to apply to the noisy bias on
            initialization. Defaults to None.
        seed (int): Random seed. If -1, seed will be sampled from global random number generator. Defaults to -1.

    Returns:
        nn.Variable: Linearly transformed input with noisy weights
    '''

    inmaps = int(np.prod(inp.shape[base_axis:]))
    if w_init is None:
        w_init = HeUniform(inmaps, n_outmap, factor=1.0/3.0, rng=rng)
    if noisy_w_init is None:
        noisy_w_init = ConstantInitializer(0.5 / np.sqrt(inmaps))
    w = get_parameter_or_create("W", (inmaps, n_outmap), w_init, True, not fix_parameters)
    if apply_w is not None:
        w = apply_w(w)

    noisy_w = get_parameter_or_create("noisy_W", (inmaps, n_outmap), noisy_w_init, True, not fix_parameters)
    if apply_noisy_w is not None:
        noisy_w = apply_noisy_w(noisy_w)

    b = None
    if with_bias:
        if b_init is None:
            b_init = HeUniform(inmaps, n_outmap, factor=1.0/3.0, rng=rng)
        b = get_parameter_or_create("b", (n_outmap, ), b_init, True, not fix_parameters)
        if apply_b is not None:
            b = apply_b(b)

    noisy_b = None
    if with_noisy_bias:
        if noisy_b_init is None:
            noisy_b_init = ConstantInitializer(0.5 / np.sqrt(inmaps))
        noisy_b = get_parameter_or_create("noisy_b", (n_outmap, ), noisy_b_init, True, not fix_parameters)
        if apply_noisy_b is not None:
            noisy_b = apply_noisy_b(noisy_b)

    def _f(x):
        return NF.sign(x) * RF.sqrt(NF.abs(x))

    e_i = _f(NF.randn(shape=(1, inmaps, 1), seed=seed))
    e_j = _f(NF.randn(shape=(1, 1, n_outmap), seed=seed))

    e_w = NF.reshape(NF.batch_matmul(e_i, e_j), shape=noisy_w.shape)
    e_w.need_grad = False
    noisy_w = noisy_w * e_w
    assert noisy_w.shape == w.shape

    if with_noisy_bias:
        assert isinstance(noisy_b, nn.Variable)
        e_b = NF.reshape(e_j, shape=noisy_b.shape)
        e_b.need_grad = False
        noisy_b = noisy_b * e_b
        assert noisy_b.shape == (n_outmap,)
    weight = w + noisy_w

    if with_bias and with_noisy_bias:
        assert isinstance(b, nn.Variable)
        assert isinstance(noisy_b, nn.Variable)
        bias = b + noisy_b
    elif with_bias:
        bias = b
    elif with_noisy_bias:
        bias = noisy_b
    else:
        bias = None
    return NF.affine(inp, weight, bias, base_axis)


[docs]def spatial_softmax(inp: nn.Variable, alpha_init: float = 1., fix_alpha: bool = False) -> nn.Variable:
    r''' Spatial softmax layer proposed in https://arxiv.org/abs/1509.06113. Computes

    .. math::
        s_{cij} &= \frac{\exp(x_{cij} / \alpha)}{\sum_{i'j'} \exp(x_{ci'j'} / \alpha)}

        f_{cx} &= \sum_{ij} s_{cij}px_{ij}, f_{cy} = \sum_{ij} s_{cij}py_{ij}

        y_{c} &= (f_{cx}, f_{cy})

    where :math:`x, y, \\alpha` are the input, output and parameter respectively,
    and :math:`c, i, j` are the number of channels, heights and widths respectively.
    :math:`(px_{ij}, py_{ij})` is the image-space position of the point (i, j) in the response map.

    Args:
        inp (nn.Variables): Input of the layer. Shape should be (batch_size, C, H, W)
        alpha_init (float): Initial temperature value. Defaults to 1.
        fix_alpha (bool): If True, underlying alpha will Not be updated during training.
            Defaults to False.

    Returns:
        nn.Variables: Feature points, Shape is (batch_size, C*2)
    '''
    assert len(inp.shape) == 4
    (batch_size, channel, height, width) = inp.shape
    alpha = get_parameter_or_create("alpha", shape=(1, 1), initializer=ConstantInitializer(alpha_init),
                                    need_grad=True, as_need_grad=not fix_alpha)

    features = NF.reshape(inp, (-1, height*width))
    softmax_attention = NF.softmax(features / alpha)

    # Image positions are normalized and defined by -1 to 1.
    # This normalization is referring to the original Guided Policy Search implementation.
    # See: https://github.com/cbfinn/gps/blob/master/python/gps/algorithm/policy_opt/tf_model_example.py#L238
    pos_x, pos_y = np.meshgrid(np.linspace(-1., 1., height), np.linspace(-1., 1., width))
    pos_x = nn.Variable.from_numpy_array(pos_x.reshape(-1, (height*width)))
    pos_y = nn.Variable.from_numpy_array(pos_y.reshape(-1, (height*width)))

    expected_x = NF.sum(pos_x*softmax_attention, axis=1, keepdims=True)
    expected_y = NF.sum(pos_y*softmax_attention, axis=1, keepdims=True)
    expected_xy = NF.concatenate(expected_x, expected_y, axis=1)

    feature_points = NF.reshape(expected_xy, (batch_size, channel*2))

    return feature_points