DCNv2_op/modulated_deformable_convolution.cc

/*!
 * Copyright (c) 2018 Microsoft
 * Licensed under The MIT License [see LICENSE for details]
 * \file modulated_deformable_convolution.cc
 * \brief
 * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
*/

#include "./modulated_deformable_convolution-inl.h"

namespace mxnet {
namespace op {
DMLC_REGISTER_PARAMETER(ModulatedDeformableConvolutionParam);

template<>
Operator* CreateOp<cpu>(ModulatedDeformableConvolutionParam param, int dtype,
                        std::vector<TShape> *in_shape,
                        std::vector<TShape> *out_shape,
                        Context ctx) {
  Operator *op = NULL;
  MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
    op = new ModulatedDeformableConvolutionOp<cpu, DType>(param);
  })
  return op;
}

// DO_BIND_DISPATCH comes from operator_common.h
Operator *ModulatedDeformableConvolutionProp::CreateOperatorEx(Context ctx,
                                            std::vector<TShape> *in_shape,
                                            std::vector<int> *in_type) const {
  std::vector<TShape> out_shape, aux_shape;
  std::vector<int> out_type, aux_type;
  CHECK(InferType(in_type, &out_type, &aux_type));
  CHECK(InferShape(in_shape, &out_shape, &aux_shape));
  DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], in_shape, &out_shape, ctx);
}

MXNET_REGISTER_OP_PROPERTY(_contrib_ModulatedDeformableConvolution, ModulatedDeformableConvolutionProp)
.describe(R"code(Compute 2-D modulated deformable convolution on 4-D input.

The modulated deformable convolution operation is described in https://arxiv.org/abs/1811.11168

For 2-D modulated deformable convolution, the shapes are

- **data**: *(batch_size, channel, height, width)*
- **offset**: *(batch_size, num_deformable_group * kernel[0] * kernel[1] * 2, height, width)*
- **mask**: *(batch_size, num_deformable_group * kernel[0] * kernel[1], height, width)*
- **weight**: *(num_filter, channel, kernel[0], kernel[1])*
- **bias**: *(num_filter,)*
- **out**: *(batch_size, num_filter, out_height, out_width)*.

Define::

  f(x,k,p,s,d) = floor((x+2*p-d*(k-1)-1)/s)+1

then we have::

  out_height=f(height, kernel[0], pad[0], stride[0], dilate[0])
  out_width=f(width, kernel[1], pad[1], stride[1], dilate[1])

If ``no_bias`` is set to be true, then the ``bias`` term is ignored.

The default data ``layout`` is *NCHW*, namely *(batch_size, channle, height,
width)*.

If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
evenly into *g* parts along the channel axis, and also evenly split ``weight``
along the first dimension. Next compute the convolution on the *i*-th part of
the data with the *i*-th weight part. The output is obtained by concating all
the *g* results.

If ``num_deformable_group`` is larger than 1, denoted by *dg*, then split the
input ``offset`` evenly into *dg* parts along the channel axis, and also evenly
split ``out`` evenly into *dg* parts along the channel axis. Next compute the
deformable convolution, apply the *i*-th part of the offset part on the *i*-th
out.


Both ``weight`` and ``bias`` are learnable parameters.


)code" ADD_FILELINE)
.add_argument("data", "NDArray-or-Symbol", "Input data to the ModulatedDeformableConvolutionOp.")
.add_argument("offset", "NDArray-or-Symbol", "Input offset to the ModulatedDeformableConvolutionOp.")
.add_argument("mask", "NDArray-or-Symbol", "Input mask to the ModulatedDeformableConvolutionOp.")
.add_argument("weight", "NDArray-or-Symbol", "Weight matrix.")
.add_argument("bias", "NDArray-or-Symbol", "Bias parameter.")
.add_arguments(ModulatedDeformableConvolutionParam::__FIELDS__());

}  // namespace op
}  // namespace mxnet