-
Notifications
You must be signed in to change notification settings - Fork 959
/
modulated_deformable_convolution.cc
91 lines (70 loc) · 3.57 KB
/
modulated_deformable_convolution.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/*!
* Copyright (c) 2018 Microsoft
* Licensed under The MIT License [see LICENSE for details]
* \file modulated_deformable_convolution.cc
* \brief
* \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
*/
#include "./modulated_deformable_convolution-inl.h"
namespace mxnet {
namespace op {
DMLC_REGISTER_PARAMETER(ModulatedDeformableConvolutionParam);
template<>
Operator* CreateOp<cpu>(ModulatedDeformableConvolutionParam param, int dtype,
std::vector<TShape> *in_shape,
std::vector<TShape> *out_shape,
Context ctx) {
Operator *op = NULL;
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
op = new ModulatedDeformableConvolutionOp<cpu, DType>(param);
})
return op;
}
// DO_BIND_DISPATCH comes from operator_common.h
Operator *ModulatedDeformableConvolutionProp::CreateOperatorEx(Context ctx,
std::vector<TShape> *in_shape,
std::vector<int> *in_type) const {
std::vector<TShape> out_shape, aux_shape;
std::vector<int> out_type, aux_type;
CHECK(InferType(in_type, &out_type, &aux_type));
CHECK(InferShape(in_shape, &out_shape, &aux_shape));
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], in_shape, &out_shape, ctx);
}
MXNET_REGISTER_OP_PROPERTY(_contrib_ModulatedDeformableConvolution, ModulatedDeformableConvolutionProp)
.describe(R"code(Compute 2-D modulated deformable convolution on 4-D input.
The modulated deformable convolution operation is described in https://arxiv.org/abs/1811.11168
For 2-D modulated deformable convolution, the shapes are
- **data**: *(batch_size, channel, height, width)*
- **offset**: *(batch_size, num_deformable_group * kernel[0] * kernel[1] * 2, height, width)*
- **mask**: *(batch_size, num_deformable_group * kernel[0] * kernel[1], height, width)*
- **weight**: *(num_filter, channel, kernel[0], kernel[1])*
- **bias**: *(num_filter,)*
- **out**: *(batch_size, num_filter, out_height, out_width)*.
Define::
f(x,k,p,s,d) = floor((x+2*p-d*(k-1)-1)/s)+1
then we have::
out_height=f(height, kernel[0], pad[0], stride[0], dilate[0])
out_width=f(width, kernel[1], pad[1], stride[1], dilate[1])
If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
The default data ``layout`` is *NCHW*, namely *(batch_size, channle, height,
width)*.
If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
evenly into *g* parts along the channel axis, and also evenly split ``weight``
along the first dimension. Next compute the convolution on the *i*-th part of
the data with the *i*-th weight part. The output is obtained by concating all
the *g* results.
If ``num_deformable_group`` is larger than 1, denoted by *dg*, then split the
input ``offset`` evenly into *dg* parts along the channel axis, and also evenly
split ``out`` evenly into *dg* parts along the channel axis. Next compute the
deformable convolution, apply the *i*-th part of the offset part on the *i*-th
out.
Both ``weight`` and ``bias`` are learnable parameters.
)code" ADD_FILELINE)
.add_argument("data", "NDArray-or-Symbol", "Input data to the ModulatedDeformableConvolutionOp.")
.add_argument("offset", "NDArray-or-Symbol", "Input offset to the ModulatedDeformableConvolutionOp.")
.add_argument("mask", "NDArray-or-Symbol", "Input mask to the ModulatedDeformableConvolutionOp.")
.add_argument("weight", "NDArray-or-Symbol", "Weight matrix.")
.add_argument("bias", "NDArray-or-Symbol", "Bias parameter.")
.add_arguments(ModulatedDeformableConvolutionParam::__FIELDS__());
} // namespace op
} // namespace mxnet