add part of opencv

This commit is contained in:
Tang1705
2020-01-27 20:20:56 +08:00
parent 0c4ac1d8bb
commit a71fa47620
6518 changed files with 3122580 additions and 0 deletions

View File

@@ -0,0 +1,424 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of Batch Normalization layer.
*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/batch_norm.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class BatchNormLayerImpl CV_FINAL : public BatchNormLayer
{
public:
Mat weights_, bias_;
UMat umat_weight, umat_bias;
mutable int dims;
BatchNormLayerImpl(const LayerParams& params)
: dims(-1)
{
setParamsFrom(params);
CV_Assert(blobs.size() >= 2);
hasWeights = params.get<bool>("has_weight", false);
hasBias = params.get<bool>("has_bias", false);
useGlobalStats = params.get<bool>("use_global_stats", true);
if(params.get<bool>("scale_bias", false))
hasWeights = hasBias = true;
epsilon = params.get<float>("eps", 1E-5);
size_t n = blobs[0].total();
CV_Assert(blobs[1].total() == n &&
blobs[0].isContinuous() && blobs[1].isContinuous() &&
blobs[0].type() == CV_32F && blobs[1].type() == CV_32F);
float varMeanScale = 1.f;
if (!hasWeights && !hasBias && blobs.size() > 2 && useGlobalStats) {
CV_Assert(blobs.size() == 3); CV_CheckTypeEQ(blobs[2].type(), CV_32FC1, "");
varMeanScale = blobs[2].at<float>(0);
if (varMeanScale != 0)
varMeanScale = 1/varMeanScale;
}
const int biasBlobIndex = blobs.size() - 1;
const int weightsBlobIndex = biasBlobIndex - hasBias;
if( hasWeights )
{
CV_Assert((size_t)weightsBlobIndex < blobs.size());
const Mat& w = blobs[weightsBlobIndex];
CV_Assert(w.isContinuous() && w.type() == CV_32F && w.total() == (size_t)n);
}
if( hasBias )
{
CV_Assert((size_t)biasBlobIndex < blobs.size());
const Mat& b = blobs[weightsBlobIndex];
CV_Assert(b.isContinuous() && b.type() == CV_32F && b.total() == (size_t)n);
}
const float* meanData = blobs[0].ptr<float>();
const float* stdData = blobs[1].ptr<float>();
const float* weightsData = hasWeights ? blobs[weightsBlobIndex].ptr<float>() : 0;
const float* biasData = hasBias ? blobs[biasBlobIndex].ptr<float>() : 0;
weights_.create(1, (int)n, CV_32F);
bias_.create(1, (int)n, CV_32F);
float* dstWeightsData = weights_.ptr<float>();
float* dstBiasData = bias_.ptr<float>();
for (size_t i = 0; i < n; ++i)
{
float w = (hasWeights ? weightsData[i] : 1.0f) / sqrt(stdData[i] * varMeanScale + epsilon);
dstWeightsData[i] = w;
dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale;
}
}
void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
{
scale = weights_;
shift = bias_;
}
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
{
Mat w, b;
top->getScaleShift(w, b);
if (w.empty() && b.empty())
return false;
const int numChannels = weights_.total();
const int numFusedWeights = w.total();
const int numFusedBias = b.total();
if ((numFusedWeights != numChannels && numFusedWeights != 1 && !w.empty()) ||
(numFusedBias != numChannels && numFusedBias != 1 && !b.empty()))
return false;
if (!w.empty())
{
w = w.reshape(1, 1);
if (numFusedWeights == 1)
{
multiply(weights_, w.at<float>(0), weights_);
multiply(bias_, w.at<float>(0), bias_);
}
else
{
multiply(weights_, w, weights_);
multiply(bias_, w, bias_);
}
}
if (!b.empty())
{
b = b.reshape(1, 1);
if (numFusedBias == 1)
add(bias_, b.at<float>(0), bias_);
else
add(bias_, b.reshape(1, 1), bias_);
}
return true;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
dims = inputs[0].size();
if (!useGlobalStats && inputs[0][0] != 1)
CV_Error(Error::StsNotImplemented, "Batch normalization in training mode with batch size > 1");
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return true;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return (backendId == DNN_BACKEND_OPENCV) ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide()) ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && (preferableTarget == DNN_TARGET_CPU || dims == 4));
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inputs_.depth() == CV_16S);
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
CV_Assert(blobs.size() >= 2);
CV_Assert(inputs.size() == 1);
if (use_half && inputs[0].dims == 2)
return false;
if (umat_weight.empty())
{
weights_.copyTo(umat_weight);
bias_.copyTo(umat_bias);
}
UMat &inpBlob = inputs[0];
int groups = inpBlob.size[0];
int channels = inpBlob.size[1];
int planeSize = 1;
for (size_t i = 2; i < inpBlob.dims; i++) {
planeSize *= inpBlob.size[i];
}
String opts = (use_half) ? " -DDtype=half" : " -DDtype=float";
for (size_t ii = 0; ii < outputs.size(); ii++)
{
if (inpBlob.dims == 2)
{
UMat& src = inputs[ii];
UMat& dst = outputs[ii];
multiply(src, weights_, dst);
add(dst, bias_, dst);
}
else
{
MatShape s = shape(groups * channels, planeSize);
UMat src = inputs[ii].reshape(1, s.size(), &s[0]);
UMat dst = outputs[ii].reshape(1, s.size(), &s[0]);
int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
String buildopt = format("-DNUM=%d", number) + opts;
String kname = format("batch_norm%d", number);
if (number == 1)
buildopt += format(" -Dconvert_T=convert_%s", use_half ? "half" : "float");
else
buildopt += format(" -Dconvert_T=convert_%s%d", use_half ? "half" : "float", number);
ocl::Kernel kernel(kname.c_str(), ocl::dnn::batchnorm_oclsrc, buildopt);
if (kernel.empty())
return false;
size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
kernel.set(1, (int)s[0]);
kernel.set(2, (int)s[1]);
kernel.set(3, (int)channels);
kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_weight));
kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_bias));
kernel.set(6, ocl::KernelArg::PtrWriteOnly(dst));
bool ret = kernel.run(2, global, NULL, false);
if (!ret)
return false;
}
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(blobs.size() >= 2);
CV_Assert(inputs.size() == 1);
Mat &inpBlob = inputs[0];
int planeSize = 1;
for (size_t i = 2; i < inpBlob.dims; i++) {
planeSize *= inpBlob.size[i];
}
for (size_t ii = 0; ii < outputs.size(); ii++)
{
Mat &outBlob = outputs[ii];
for(int num = 0; num < outBlob.size[0]; num++)
{
for (int n = 0; n < outBlob.size[1]; n++)
{
float w = weights_.at<float>(n);
float b = bias_.at<float>(n);
Mat inpBlobPlane(1, planeSize, CV_32F, inpBlob.ptr<float>(num, n));
Mat outBlobPlane(1, planeSize, CV_32F, outBlob.ptr<float>(num, n));
inpBlobPlane.convertTo(outBlobPlane, CV_32F, w, b);
}
}
}
}
void forwardSlice(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE
{
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
{
int i = 0;
float w = weights_.at<float>(cn);
float b = bias_.at<float>(cn);
#if CV_SIMD128
v_float32x4 wV = v_setall_f32(w), bV = v_setall_f32(b);
for( ; i <= len - 16; i += 16 )
{
v_float32x4 x0 = v_load(srcptr + i);
v_float32x4 x1 = v_load(srcptr + i + 4);
v_float32x4 x2 = v_load(srcptr + i + 8);
v_float32x4 x3 = v_load(srcptr + i + 12);
x0 = v_muladd(x0, wV, bV);
x1 = v_muladd(x1, wV, bV);
x2 = v_muladd(x2, wV, bV);
x3 = v_muladd(x3, wV, bV);
v_store(dstptr + i, x0);
v_store(dstptr + i + 4, x1);
v_store(dstptr + i + 8, x2);
v_store(dstptr + i + 12, x3);
}
#endif
for( ; i < len; i++ )
dstptr[i] = w * srcptr[i] + b;
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::BatchNormOp>(preferableTarget, std::move(context->stream), weights_, bias_);
}
#endif
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
{
switch (node->backendId)
{
case DNN_BACKEND_HALIDE:
{
#ifdef HAVE_HALIDE
auto base = node.dynamicCast<HalideBackendNode>();
Halide::Func& input = base->funcs.back();
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = attachHalide(input(x, y, c, n));
return Ptr<BackendNode>(new HalideBackendNode(base, top));
#endif // HAVE_HALIDE
break;
}
}
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
Halide::Buffer<float> input = halideBuffer(inputs[0]);
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = attachHalide(input(x, y, c, n));
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_HALIDE
// attachHalide can work both with Halide::Buffer and Halide::Func. In the
// second case it will be a fusion.
Halide::Func attachHalide(const Halide::Expr& input)
{
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Var x("x"), y("y"), c("c"), n("n");
const int numChannels = weights_.total();
auto weights = wrapToHalideBuffer(weights_, {numChannels});
auto bias = wrapToHalideBuffer(bias_, {numChannels});
top(x, y, c, n) = input * weights(c) + bias(c);
return top;
}
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
const size_t numChannels = weights_.total();
addConstantData("weights", wrapToInfEngineBlob(weights_, {numChannels}, InferenceEngine::Layout::C), ieLayer);
addConstantData("biases", wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C), ieLayer);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
shape[1] = weights_.total();
auto weight = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), weights_.data);
auto bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), bias_.data);
auto scale_node = std::make_shared<ngraph::op::v1::Multiply>(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY);
auto scale_shift = std::make_shared<ngraph::op::v1::Add>(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
return Ptr<BackendNode>(new InfEngineNgraphNode(scale_shift));
}
#endif // HAVE_DNN_NGRAPH
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
int64 flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 3*total(inputs[i]);
}
return flops;
}
private:
bool useGlobalStats;
};
Ptr<BatchNormLayer> BatchNormLayer::create(const LayerParams& params)
{
return Ptr<BatchNormLayer>(new BatchNormLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@@ -0,0 +1,190 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/reshape.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class BlankLayerImpl CV_FINAL : public BlankLayer
{
public:
BlankLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return true;
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
for (int i = 0, n = outputs.size(); i < n; ++i)
{
void *src_handle = inputs[i].handle(ACCESS_READ);
void *dst_handle = outputs[i].handle(ACCESS_WRITE);
if (src_handle != dst_handle)
inputs[i].copyTo(outputs[i]);
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
for (int i = 0, n = outputs.size(); i < n; ++i)
if (outputs[i].data != inputs[i].data)
inputs[i].copyTo(outputs[i]);
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
}
#endif
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
std::vector<size_t> dims = input->getDims();
CV_Assert(!dims.empty());
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
if (preferableTarget == DNN_TARGET_MYRIAD)
{
ieLayer.setType("Copy");
}
else
{
ieLayer.setType("Split");
ieLayer.getParameters()["axis"] = dims.size() - 1;
ieLayer.getParameters()["out_sizes"] = dims[0];
}
ieLayer.setInputPorts({InferenceEngine::Port(dims)});
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
ngraph::NodeVector inp{ieInpNode};
auto blank = std::make_shared<ngraph::op::Concat>(inp, 0);
return Ptr<BackendNode>(new InfEngineNgraphNode(blank));
}
#endif // HAVE_DNN_NGRAPH
};
Ptr<Layer> BlankLayer::create(const LayerParams& params)
{
// In case of Caffe's Dropout layer from Faster-RCNN framework,
// https://github.com/rbgirshick/caffe-fast-rcnn/tree/faster-rcnn
// return Power layer.
if (!params.get<bool>("scale_train", true))
{
float scale = 1 - params.get<float>("dropout_ratio", 0.5f);
CV_Assert(scale > 0);
LayerParams powerParams;
powerParams.name = params.name;
powerParams.type = "Power";
powerParams.set("scale", scale);
return PowerLayer::create(powerParams);
}
else
return Ptr<BlankLayer>(new BlankLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,373 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "../op_vkcom.hpp"
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/concat.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class ConcatLayerImpl CV_FINAL : public ConcatLayer
{
public:
ConcatLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
axis = params.get<int>("axis", 1);
padding = params.get<bool>("padding", false);
}
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() > 0);
outputs.resize(1, inputs[0]);
int cAxis = clamp(axis, inputs[0]);
int axisSum = 0;
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape curShape = inputs[i];
if (padding)
{
for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
{
outputs[0][curAxis] = std::max(outputs[0][curAxis], curShape[curAxis]);
}
}
else
{
CV_Assert(curShape.size() == outputs[0].size());
for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
{
if (curAxis != cAxis && outputs[0][curAxis] != curShape[curAxis])
CV_Error(Error::StsBadSize, "Inconsistent shape for ConcatLayer");
}
}
axisSum += curShape[cAxis];
}
outputs[0][cAxis] = axisSum;
return false;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding) || // By channels
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && !padding) ||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding);
}
class ChannelConcatInvoker : public ParallelLoopBody
{
public:
std::vector<Mat>* inputs;
Mat* output;
int nstripes;
std::vector<const float*> chptrs;
static void run(std::vector<Mat>& inputs, Mat& output, int nstripes)
{
ChannelConcatInvoker cc;
cc.inputs = &inputs;
cc.output = &output;
cc.nstripes = nstripes;
size_t i, ninputs = inputs.size();
int nchannels = 0, batchsz = output.size[0];
for( i = 0; i < ninputs; i++ )
{
Mat& inp = inputs[i];
CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16S) &&
inp.dims == 4 && inp.size[0] == output.size[0] &&
inp.size[2] == output.size[2] &&
inp.size[3] == output.size[3] );
nchannels += inp.size[1];
}
CV_Assert( nchannels == output.size[1] );
CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16S) );
cc.chptrs.resize(nchannels*batchsz);
int ofs = 0;
for( i = 0; i < ninputs; i++)
{
Mat& inp = inputs[i];
for( int j = 0; j < batchsz; j++ )
for( int k = 0; k < inp.size[1]; k++ )
{
const float* ptr = inp.ptr<float>(j, k);
cc.chptrs[ofs + j*nchannels + k] = ptr;
}
ofs += inp.size[1];
}
parallel_for_(Range(0, nstripes), cc, nstripes);
}
ChannelConcatInvoker() : inputs(0), output(0), nstripes(0) {}
void operator()(const Range& r) const CV_OVERRIDE
{
size_t planeSize = (size_t)output->size[2]*output->size[3];
size_t nch = chptrs.size();
size_t total = nch*planeSize;
size_t stripeSize = (total + nstripes - 1)/nstripes;
size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = std::min(total, r.end*stripeSize);
const float** ptrs = (const float**)&chptrs[0];
float* outptr = output->ptr<float>();
size_t blockSize0 = 1 << 16;
for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; )
{
size_t ch = ofs0/planeSize;
size_t ofs = ofs0 - ch*planeSize;
size_t blockSize = std::min(blockSize0, planeSize - ofs);
memcpy(outptr + ofs0, ptrs[ch] + ofs, blockSize*sizeof(outptr[0]));
ofs0 += blockSize;
}
}
};
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int cAxis = clamp(axis, inputs[0].dims);
if (padding)
return false;
int bottom_concat_axis;
int concat_size = total(shape(inputs[0]), cAxis + 1);
int top_concat_axis = outputs[0].size[cAxis];
int num_concats = total(shape(inputs[0]), 0, cAxis);
int offset_concat_axis = 0;
UMat& outMat = outputs[0];
String buildopt = format(" -DDtype=%s", (use_half) ? "half" : "float");
String kname = format("concat_%s", use_half ? "half" : "float");
for (size_t i = 0; i < inputs.size(); i++)
{
ocl::Kernel kernel(kname.c_str(), ocl::dnn::concat_oclsrc, buildopt);
if (kernel.empty())
return false;
UMat& inpMat = inputs[i];
bottom_concat_axis = inputs[i].size[cAxis];
size_t nthreads = inputs[i].total();
kernel.set(0, (int)nthreads);
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
kernel.set(2, (int)num_concats);
kernel.set(3, (int)concat_size);
kernel.set(4, (int)top_concat_axis);
kernel.set(5, (int)bottom_concat_axis);
kernel.set(6, (int)offset_concat_axis);
kernel.set(7, ocl::KernelArg::PtrWriteOnly(outMat));
if (!kernel.run(1, &nthreads, NULL, false))
return false;
offset_concat_axis += bottom_concat_axis;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
int cAxis = clamp(axis, inputs[0].dims);
Mat& outMat = outputs[0];
if (padding)
outMat.setTo(0);
if( cAxis == 1 && outMat.dims == 4 && !padding)
{
int nstripes = getNumThreads();
ChannelConcatInvoker::run(inputs, outMat, nstripes);
}
else
{
std::vector<Range> ranges(outputs[0].dims, Range::all());
ranges[cAxis].start = 0;
for (size_t i = 0; i < inputs.size(); i++)
{
ranges[cAxis].end = ranges[cAxis].start + inputs[i].size[cAxis];
for (int j = 0; j < outMat.dims; ++j)
{
if (j == cAxis) continue;
ranges[j].start = (outMat.size[j] - inputs[i].size[j]) / 2;
ranges[j].end = ranges[j].start + inputs[i].size[j];
}
inputs[i].copyTo(outMat(&ranges[0]));
ranges[cAxis].start = ranges[cAxis].end;
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto concat_axis = clamp(axis, input_wrapper->getRank());
return make_cuda_node<cuda4dnn::ConcatOp>(preferableTarget, std::move(context->stream), concat_axis, padding);
}
#endif
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
#ifdef HAVE_VULKAN
vkcom::Tensor in = VkComTensor(input[0]);
int cAxis = clamp(axis, in.dimNum());
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpConcat(cAxis));
return Ptr<BackendNode>(new VkComBackendNode(input, op));
#endif // HAVE_VULKAN
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
std::vector<Halide::Buffer<> > inputBuffers = halideBuffers(input);
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
int offset = inputBuffers[0].channels();
Halide::Expr topExpr = select(c < offset,
inputBuffers[0](x, y, c, n),
inputBuffers[1](x, y, c - offset, n));
for (int i = 2; i < input.size(); ++i)
{
offset += inputBuffers[i - 1].channels();
topExpr = select(c < offset, topExpr,
inputBuffers[i](x, y, c - offset, n));
}
top(x, y, c, n) = topExpr;
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
InferenceEngine::Builder::ConcatLayer ieLayer(name);
ieLayer.setAxis(clamp(axis, input->getDims().size()));
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert(inputs.size() == nodes.size());
ngraph::NodeVector inp_nodes;
for (auto& node : nodes) {
inp_nodes.push_back(node.dynamicCast<InfEngineNgraphNode>()->node);
}
InferenceEngine::DataPtr data = ngraphDataNode(inputs[0]);
auto concat = std::make_shared<ngraph::op::Concat>(inp_nodes, clamp(axis, data->getDims().size()));
return Ptr<BackendNode>(new InfEngineNgraphNode(concat));
}
#endif // HAVE_DNN_NGRAPH
};
Ptr<ConcatLayer> ConcatLayer::create(const LayerParams& params)
{
return Ptr<ConcatLayer>(new ConcatLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,106 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "../op_inf_engine.hpp"
#include "../op_cuda.hpp"
#include "layers_common.hpp"
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/const.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv { namespace dnn {
class ConstLayerImpl CV_FINAL : public ConstLayer
{
public:
ConstLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
CV_Assert(blobs.size() == 1);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backendId == DNN_BACKEND_CUDA;
}
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.empty());
outputs.assign(1, shape(blobs[0]));
return false;
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> outputs;
outs.getUMatVector(outputs);
if (outs.depth() == CV_16S)
convertFp16(blobs[0], outputs[0]);
else
blobs[0].copyTo(outputs[0]);
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> outputs;
outputs_arr.getMatVector(outputs);
blobs[0].copyTo(outputs[0]);
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::ConstLayer ieLayer(name);
ieLayer.setData(wrapToInfEngineBlob(blobs[0]));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
CV_Assert(blobs.size() == 1);
return make_cuda_node<cuda4dnn::ConstOp>(preferableTarget, std::move(context->stream), blobs[0]);
}
#endif
};
Ptr<Layer> ConstLayer::create(const LayerParams& params)
{
return Ptr<Layer>(new ConstLayerImpl(params));
}
}} // namespace cv::dnn

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,146 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "layers_common.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/crop_and_resize.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv { namespace dnn {
class CropAndResizeLayerImpl CV_FINAL : public CropAndResizeLayer
{
public:
CropAndResizeLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
CV_Assert_N(params.has("width"), params.has("height"));
outWidth = params.get<float>("width");
outHeight = params.get<float>("height");
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert_N(inputs.size() == 2, inputs[0].size() == 4);
if (inputs[0][0] != 1)
CV_Error(Error::StsNotImplemented, "");
outputs.resize(1, MatShape(4));
outputs[0][0] = inputs[1][2]; // Number of bounding boxes.
outputs[0][1] = inputs[0][1]; // Number of channels.
outputs[0][2] = outHeight;
outputs[0][3] = outWidth;
return false;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
Mat& inp = inputs[0];
Mat& out = outputs[0];
Mat boxes = inputs[1].reshape(1, inputs[1].total() / 7);
const int numChannels = inp.size[1];
const int inpHeight = inp.size[2];
const int inpWidth = inp.size[3];
const int inpSpatialSize = inpHeight * inpWidth;
const int outSpatialSize = outHeight * outWidth;
CV_Assert_N(inp.isContinuous(), out.isContinuous());
for (int b = 0; b < boxes.rows; ++b)
{
float* outDataBox = out.ptr<float>(b);
float left = boxes.at<float>(b, 3);
float top = boxes.at<float>(b, 4);
float right = boxes.at<float>(b, 5);
float bottom = boxes.at<float>(b, 6);
float boxWidth = right - left;
float boxHeight = bottom - top;
float heightScale = boxHeight * static_cast<float>(inpHeight - 1) / (outHeight - 1);
float widthScale = boxWidth * static_cast<float>(inpWidth - 1) / (outWidth - 1);
for (int y = 0; y < outHeight; ++y)
{
float input_y = top * (inpHeight - 1) + y * heightScale;
int y0 = static_cast<int>(input_y);
const float* inpData_row0 = inp.ptr<float>(0, 0, y0);
const float* inpData_row1 = (y0 + 1 < inpHeight) ? (inpData_row0 + inpWidth) : inpData_row0;
for (int x = 0; x < outWidth; ++x)
{
float input_x = left * (inpWidth - 1) + x * widthScale;
int x0 = static_cast<int>(input_x);
int x1 = std::min(x0 + 1, inpWidth - 1);
float* outData = outDataBox + y * outWidth + x;
const float* inpData_row0_c = inpData_row0;
const float* inpData_row1_c = inpData_row1;
for (int c = 0; c < numChannels; ++c)
{
*outData = inpData_row0_c[x0] +
(input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
(input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
(input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0]));
inpData_row0_c += inpSpatialSize;
inpData_row1_c += inpSpatialSize;
outData += outSpatialSize;
}
}
}
}
if (boxes.rows < out.size[0])
{
// left = top = right = bottom = 0
std::vector<cv::Range> dstRanges(4, Range::all());
dstRanges[0] = Range(boxes.rows, out.size[0]);
out(dstRanges).setTo(inp.ptr<float>(0, 0, 0)[0]);
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::CropAndResizeOp>(preferableTarget, std::move(context->stream));
}
#endif
private:
int outWidth, outHeight;
};
Ptr<Layer> CropAndResizeLayer::create(const LayerParams& params)
{
return Ptr<CropAndResizeLayer>(new CropAndResizeLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@@ -0,0 +1,998 @@
/*M ///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_inf_engine.hpp"
#include <float.h>
#include <string>
#include "../nms.inl.hpp"
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#include <ngraph/op/experimental/layers/detection_output.hpp>
#endif
namespace cv
{
namespace dnn
{
namespace util
{
class NormalizedBBox
{
public:
float xmin, ymin, xmax, ymax;
NormalizedBBox()
: xmin(0), ymin(0), xmax(0), ymax(0), has_size_(false), size_(0) {}
float size() const { return size_; }
bool has_size() const { return has_size_; }
void set_size(float value) { size_ = value; has_size_ = true; }
void clear_size() { size_ = 0; has_size_ = false; }
private:
bool has_size_;
float size_;
};
template <typename T>
static inline bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2)
{
return pair1.first > pair2.first;
}
static inline float caffe_box_overlap(const util::NormalizedBBox& a, const util::NormalizedBBox& b);
static inline float caffe_norm_box_overlap(const util::NormalizedBBox& a, const util::NormalizedBBox& b);
} // namespace
class DetectionOutputLayerImpl CV_FINAL : public DetectionOutputLayer
{
public:
unsigned _numClasses;
bool _shareLocation;
int _numLocClasses;
int _backgroundLabelId;
cv::String _codeType;
bool _varianceEncodedInTarget;
int _keepTopK;
float _confidenceThreshold;
float _nmsThreshold;
int _topK;
// Whenever predicted bounding boxes are represented in YXHW instead of XYWH layout.
bool _locPredTransposed;
// It's true whenever predicted bounding boxes and proposals are normalized to [0, 1].
bool _bboxesNormalized;
bool _clip;
bool _groupByClasses;
enum { _numAxes = 4 };
static const std::string _layerName;
typedef std::map<int, std::vector<util::NormalizedBBox> > LabelBBox;
bool getParameterDict(const LayerParams &params,
const std::string &parameterName,
DictValue& result)
{
if (!params.has(parameterName))
{
return false;
}
result = params.get(parameterName);
return true;
}
template<typename T>
T getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx=0,
const bool required=true,
const T& defaultValue=T())
{
DictValue dictValue;
bool success = getParameterDict(params, parameterName, dictValue);
if(!success)
{
if(required)
{
std::string message = _layerName;
message += " layer parameter does not contain ";
message += parameterName;
message += " parameter.";
CV_Error(Error::StsBadArg, message);
}
else
{
return defaultValue;
}
}
return dictValue.get<T>(idx);
}
void getCodeType(const LayerParams &params)
{
String codeTypeString = toLowerCase(params.get<String>("code_type"));
if (codeTypeString == "center_size")
_codeType = "CENTER_SIZE";
else
_codeType = "CORNER";
}
DetectionOutputLayerImpl(const LayerParams &params)
{
_numClasses = getParameter<unsigned>(params, "num_classes");
_shareLocation = getParameter<bool>(params, "share_location");
_numLocClasses = _shareLocation ? 1 : _numClasses;
_backgroundLabelId = getParameter<int>(params, "background_label_id");
_varianceEncodedInTarget = getParameter<bool>(params, "variance_encoded_in_target", 0, false, false);
_keepTopK = getParameter<int>(params, "keep_top_k");
_confidenceThreshold = getParameter<float>(params, "confidence_threshold", 0, false, 0);
_topK = getParameter<int>(params, "top_k", 0, false, -1);
_locPredTransposed = getParameter<bool>(params, "loc_pred_transposed", 0, false, false);
_bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
_clip = getParameter<bool>(params, "clip", 0, false, false);
_groupByClasses = getParameter<bool>(params, "group_by_classes", 0, false, true);
getCodeType(params);
// Parameters used in nms.
_nmsThreshold = getParameter<float>(params, "nms_threshold");
CV_Assert(_nmsThreshold > 0.);
setParamsFrom(params);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && !_locPredTransposed && _bboxesNormalized);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
const int num = inputs[0][0];
CV_Assert(inputs.size() >= 3);
CV_Assert(num == inputs[1][0]);
int numPriors = inputs[2][2] / 4;
CV_Assert((numPriors * _numLocClasses * 4) == total(inputs[0], 1));
CV_Assert(int(numPriors * _numClasses) == total(inputs[1], 1));
CV_Assert(inputs[2][1] == 1 + (int)(!_varianceEncodedInTarget));
// num() and channels() are 1.
// Since the number of bboxes to be kept is unknown before nms, we manually
// set it to maximal number of detections, [keep_top_k] parameter multiplied by batch size.
// Each row is a 7 dimension std::vector, which stores
// [image_id, label, confidence, xmin, ymin, xmax, ymax]
outputs.resize(1, shape(1, 1, _keepTopK * num, 7));
return false;
}
#ifdef HAVE_OPENCL
// Decode all bboxes in a batch
bool ocl_DecodeBBoxesAll(UMat& loc_mat, UMat& prior_mat,
const int num, const int numPriors, const bool share_location,
const int num_loc_classes, const int background_label_id,
const cv::String& code_type, const bool variance_encoded_in_target,
const bool clip, std::vector<LabelBBox>& all_decode_bboxes)
{
UMat outmat = UMat(loc_mat.dims, loc_mat.size, CV_32F);
size_t nthreads = loc_mat.total();
String kernel_name;
if (code_type == "CORNER")
kernel_name = "DecodeBBoxesCORNER";
else if (code_type == "CENTER_SIZE")
kernel_name = "DecodeBBoxesCENTER_SIZE";
else
return false;
for (int i = 0; i < num; ++i)
{
ocl::Kernel kernel(kernel_name.c_str(), ocl::dnn::detection_output_oclsrc);
kernel.set(0, (int)nthreads);
kernel.set(1, ocl::KernelArg::PtrReadOnly(loc_mat));
kernel.set(2, ocl::KernelArg::PtrReadOnly(prior_mat));
kernel.set(3, (int)variance_encoded_in_target);
kernel.set(4, (int)numPriors);
kernel.set(5, (int)share_location);
kernel.set(6, (int)num_loc_classes);
kernel.set(7, (int)background_label_id);
kernel.set(8, (int)clip);
kernel.set(9, (int)_locPredTransposed);
kernel.set(10, ocl::KernelArg::PtrWriteOnly(outmat));
if (!kernel.run(1, &nthreads, NULL, false))
return false;
}
all_decode_bboxes.clear();
all_decode_bboxes.resize(num);
{
Mat mat = outmat.getMat(ACCESS_READ);
const float* decode_data = mat.ptr<float>();
for (int i = 0; i < num; ++i)
{
LabelBBox& decode_bboxes = all_decode_bboxes[i];
for (int c = 0; c < num_loc_classes; ++c)
{
int label = share_location ? -1 : c;
decode_bboxes[label].resize(numPriors);
for (int p = 0; p < numPriors; ++p)
{
int startIdx = p * num_loc_classes * 4;
util::NormalizedBBox& bbox = decode_bboxes[label][p];
bbox.xmin = decode_data[startIdx + c * 4];
bbox.ymin = decode_data[startIdx + c * 4 + 1];
bbox.xmax = decode_data[startIdx + c * 4 + 2];
bbox.ymax = decode_data[startIdx + c * 4 + 3];
}
}
}
}
return true;
}
void ocl_GetConfidenceScores(const UMat& inp1, const int num,
const int numPredsPerClass, const int numClasses,
std::vector<Mat>& confPreds)
{
int shape[] = { numClasses, numPredsPerClass };
for (int i = 0; i < num; i++)
confPreds.push_back(Mat(2, shape, CV_32F));
shape[0] = num * numPredsPerClass;
shape[1] = inp1.total() / shape[0];
UMat umat = inp1.reshape(1, 2, &shape[0]);
for (int i = 0; i < num; ++i)
{
Range ranges[] = { Range(i * numPredsPerClass, (i + 1) * numPredsPerClass), Range::all() };
transpose(umat(ranges), confPreds[i]);
}
}
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
outs.getUMatVector(outputs);
bool use_half = (inps.depth() == CV_16S);
if (use_half)
{
std::vector<UMat> orig_inputs;
inps.getUMatVector(orig_inputs);
inputs.resize(orig_inputs.size());
for (size_t i = 0; i < orig_inputs.size(); i++)
convertFp16(orig_inputs[i], inputs[i]);
}
else
{
inps.getUMatVector(inputs);
}
std::vector<LabelBBox> allDecodedBBoxes;
std::vector<Mat> allConfidenceScores;
int num = inputs[0].size[0];
// extract predictions from input layers
{
int numPriors = inputs[2].size[2] / 4;
// Retrieve all confidences
ocl_GetConfidenceScores(inputs[1], num, numPriors, _numClasses, allConfidenceScores);
// Decode all loc predictions to bboxes
bool ret = ocl_DecodeBBoxesAll(inputs[0], inputs[2], num, numPriors,
_shareLocation, _numLocClasses, _backgroundLabelId,
_codeType, _varianceEncodedInTarget, _clip,
allDecodedBBoxes);
if (!ret)
return false;
}
size_t numKept = 0;
std::vector<std::map<int, std::vector<int> > > allIndices;
for (int i = 0; i < num; ++i)
{
numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
}
if (numKept == 0)
{
outputs[0].setTo(0);
return true;
}
UMat umat = use_half ? UMat::zeros(4, outputs[0].size, CV_32F) : outputs[0];
if (!use_half)
umat.setTo(0);
// If there are valid detections
if (numKept > 0)
{
Mat mat = umat.getMat(ACCESS_WRITE);
float* outputsData = mat.ptr<float>();
size_t count = 0;
for (int i = 0; i < num; ++i)
{
count += outputDetections_(i, &outputsData[count * 7],
allDecodedBBoxes[i], allConfidenceScores[i],
allIndices[i], _groupByClasses);
}
CV_Assert(count == numKept);
}
if (use_half)
{
UMat half_umat;
convertFp16(umat, half_umat);
outs.assign(std::vector<UMat>(1, half_umat));
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (_bboxesNormalized)
{
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
}
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
std::vector<LabelBBox> allDecodedBBoxes;
std::vector<Mat> allConfidenceScores;
int num = inputs[0].size[0];
// extract predictions from input layers
{
int numPriors = inputs[2].size[2] / 4;
const float* locationData = inputs[0].ptr<float>();
const float* confidenceData = inputs[1].ptr<float>();
const float* priorData = inputs[2].ptr<float>();
// Retrieve all location predictions
std::vector<LabelBBox> allLocationPredictions;
GetLocPredictions(locationData, num, numPriors, _numLocClasses,
_shareLocation, _locPredTransposed, allLocationPredictions);
// Retrieve all confidences
GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
// Retrieve all prior bboxes
std::vector<util::NormalizedBBox> priorBBoxes;
std::vector<std::vector<float> > priorVariances;
GetPriorBBoxes(priorData, numPriors, _bboxesNormalized, priorBBoxes, priorVariances);
// Decode all loc predictions to bboxes
util::NormalizedBBox clipBounds;
if (_clip)
{
CV_Assert(_bboxesNormalized || inputs.size() >= 4);
clipBounds.xmin = clipBounds.ymin = 0.0f;
if (_bboxesNormalized)
clipBounds.xmax = clipBounds.ymax = 1.0f;
else
{
// Input image sizes;
CV_Assert(inputs[3].dims == 4);
clipBounds.xmax = inputs[3].size[3] - 1;
clipBounds.ymax = inputs[3].size[2] - 1;
}
}
DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, num,
_shareLocation, _numLocClasses, _backgroundLabelId,
_codeType, _varianceEncodedInTarget, _clip, clipBounds,
_bboxesNormalized, allDecodedBBoxes);
}
size_t numKept = 0;
std::vector<std::map<int, std::vector<int> > > allIndices;
for (int i = 0; i < num; ++i)
{
numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
}
outputs[0].setTo(0);
// If there is no detections
if (numKept == 0)
return;
float* outputsData = outputs[0].ptr<float>();
size_t count = 0;
for (int i = 0; i < num; ++i)
{
count += outputDetections_(i, &outputsData[count * 7],
allDecodedBBoxes[i], allConfidenceScores[i],
allIndices[i], _groupByClasses);
}
CV_Assert(count == numKept);
// Sync results back due changed output shape.
outputs_arr.assign(outputs);
}
size_t outputDetections_(
const int i, float* outputsData,
const LabelBBox& decodeBBoxes, Mat& confidenceScores,
const std::map<int, std::vector<int> >& indicesMap,
bool groupByClasses
)
{
std::vector<int> dstIndices;
std::vector<std::pair<float, int> > allScores;
for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it)
{
int label = it->first;
if (confidenceScores.rows <= label)
CV_Error_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label));
const std::vector<float>& scores = confidenceScores.row(label);
const std::vector<int>& indices = it->second;
const int numAllScores = allScores.size();
allScores.reserve(numAllScores + indices.size());
for (size_t j = 0; j < indices.size(); ++j)
{
allScores.push_back(std::make_pair(scores[indices[j]], numAllScores + j));
}
}
if (!groupByClasses)
std::sort(allScores.begin(), allScores.end(), util::SortScorePairDescend<int>);
dstIndices.resize(allScores.size());
for (size_t j = 0; j < dstIndices.size(); ++j)
{
dstIndices[allScores[j].second] = j;
}
size_t count = 0;
for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it)
{
int label = it->first;
if (confidenceScores.rows <= label)
CV_Error_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label));
const std::vector<float>& scores = confidenceScores.row(label);
int locLabel = _shareLocation ? -1 : label;
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(locLabel);
if (label_bboxes == decodeBBoxes.end())
CV_Error_(cv::Error::StsError, ("Could not find location predictions for label %d", locLabel));
const std::vector<int>& indices = it->second;
for (size_t j = 0; j < indices.size(); ++j, ++count)
{
int idx = indices[j];
int dstIdx = dstIndices[count];
const util::NormalizedBBox& decode_bbox = label_bboxes->second[idx];
outputsData[dstIdx * 7] = i;
outputsData[dstIdx * 7 + 1] = label;
outputsData[dstIdx * 7 + 2] = scores[idx];
outputsData[dstIdx * 7 + 3] = decode_bbox.xmin;
outputsData[dstIdx * 7 + 4] = decode_bbox.ymin;
outputsData[dstIdx * 7 + 5] = decode_bbox.xmax;
outputsData[dstIdx * 7 + 6] = decode_bbox.ymax;
}
}
return count;
}
size_t processDetections_(
const LabelBBox& decodeBBoxes, Mat& confidenceScores,
std::vector<std::map<int, std::vector<int> > >& allIndices
)
{
std::map<int, std::vector<int> > indices;
size_t numDetections = 0;
for (int c = 0; c < (int)_numClasses; ++c)
{
if (c == _backgroundLabelId)
continue; // Ignore background class.
if (c >= confidenceScores.rows)
CV_Error_(cv::Error::StsError, ("Could not find confidence predictions for label %d", c));
const std::vector<float> scores = confidenceScores.row(c);
int label = _shareLocation ? -1 : c;
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(label);
if (label_bboxes == decodeBBoxes.end())
CV_Error_(cv::Error::StsError, ("Could not find location predictions for label %d", label));
if (_bboxesNormalized)
NMSFast_(label_bboxes->second, scores, _confidenceThreshold, _nmsThreshold, 1.0, _topK,
indices[c], util::caffe_norm_box_overlap);
else
NMSFast_(label_bboxes->second, scores, _confidenceThreshold, _nmsThreshold, 1.0, _topK,
indices[c], util::caffe_box_overlap);
numDetections += indices[c].size();
}
if (_keepTopK > -1 && numDetections > (size_t)_keepTopK)
{
std::vector<std::pair<float, std::pair<int, int> > > scoreIndexPairs;
for (std::map<int, std::vector<int> >::iterator it = indices.begin();
it != indices.end(); ++it)
{
int label = it->first;
const std::vector<int>& labelIndices = it->second;
if (label >= confidenceScores.rows)
CV_Error_(cv::Error::StsError, ("Could not find location predictions for label %d", label));
const std::vector<float>& scores = confidenceScores.row(label);
for (size_t j = 0; j < labelIndices.size(); ++j)
{
size_t idx = labelIndices[j];
CV_Assert(idx < scores.size());
scoreIndexPairs.push_back(std::make_pair(scores[idx], std::make_pair(label, idx)));
}
}
// Keep outputs k results per image.
std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(),
util::SortScorePairDescend<std::pair<int, int> >);
scoreIndexPairs.resize(_keepTopK);
std::map<int, std::vector<int> > newIndices;
for (size_t j = 0; j < scoreIndexPairs.size(); ++j)
{
int label = scoreIndexPairs[j].second.first;
int idx = scoreIndexPairs[j].second.second;
newIndices[label].push_back(idx);
}
allIndices.push_back(newIndices);
return (size_t)_keepTopK;
}
else
{
allIndices.push_back(indices);
return numDetections;
}
}
// **************************************************************
// Utility functions
// **************************************************************
// Compute bbox size
static float BBoxSize(const util::NormalizedBBox& bbox, bool normalized)
{
if (bbox.xmax < bbox.xmin || bbox.ymax < bbox.ymin)
{
return 0; // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
}
else
{
if (bbox.has_size())
{
return bbox.size();
}
else
{
float width = bbox.xmax - bbox.xmin;
float height = bbox.ymax - bbox.ymin;
if (normalized)
{
return width * height;
}
else
{
// If bbox is not within range [0, 1].
return (width + 1) * (height + 1);
}
}
}
}
// Decode a bbox according to a prior bbox
template<bool variance_encoded_in_target>
static void DecodeBBox(
const util::NormalizedBBox& prior_bbox, const std::vector<float>& prior_variance,
const cv::String& code_type,
const bool clip_bbox, const util::NormalizedBBox& clip_bounds,
const bool normalized_bbox, const util::NormalizedBBox& bbox,
util::NormalizedBBox& decode_bbox)
{
float bbox_xmin = variance_encoded_in_target ? bbox.xmin : prior_variance[0] * bbox.xmin;
float bbox_ymin = variance_encoded_in_target ? bbox.ymin : prior_variance[1] * bbox.ymin;
float bbox_xmax = variance_encoded_in_target ? bbox.xmax : prior_variance[2] * bbox.xmax;
float bbox_ymax = variance_encoded_in_target ? bbox.ymax : prior_variance[3] * bbox.ymax;
if (code_type == "CORNER")
{
decode_bbox.xmin = prior_bbox.xmin + bbox_xmin;
decode_bbox.ymin = prior_bbox.ymin + bbox_ymin;
decode_bbox.xmax = prior_bbox.xmax + bbox_xmax;
decode_bbox.ymax = prior_bbox.ymax + bbox_ymax;
}
else if (code_type == "CENTER_SIZE")
{
float prior_width = prior_bbox.xmax - prior_bbox.xmin;
float prior_height = prior_bbox.ymax - prior_bbox.ymin;
if (!normalized_bbox)
{
prior_width += 1.0f;
prior_height += 1.0f;
}
float prior_center_x = prior_bbox.xmin + prior_width * .5;
float prior_center_y = prior_bbox.ymin + prior_height * .5;
float decode_bbox_center_x, decode_bbox_center_y;
float decode_bbox_width, decode_bbox_height;
decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;
decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;
decode_bbox_width = exp(bbox_xmax) * prior_width;
decode_bbox_height = exp(bbox_ymax) * prior_height;
decode_bbox.xmin = decode_bbox_center_x - decode_bbox_width * .5;
decode_bbox.ymin = decode_bbox_center_y - decode_bbox_height * .5;
decode_bbox.xmax = decode_bbox_center_x + decode_bbox_width * .5;
decode_bbox.ymax = decode_bbox_center_y + decode_bbox_height * .5;
}
else
CV_Error(Error::StsBadArg, "Unknown type.");
if (clip_bbox)
{
// Clip the util::NormalizedBBox.
decode_bbox.xmin = std::max(std::min(decode_bbox.xmin, clip_bounds.xmax), clip_bounds.xmin);
decode_bbox.ymin = std::max(std::min(decode_bbox.ymin, clip_bounds.ymax), clip_bounds.ymin);
decode_bbox.xmax = std::max(std::min(decode_bbox.xmax, clip_bounds.xmax), clip_bounds.xmin);
decode_bbox.ymax = std::max(std::min(decode_bbox.ymax, clip_bounds.ymax), clip_bounds.ymin);
}
decode_bbox.clear_size();
decode_bbox.set_size(BBoxSize(decode_bbox, normalized_bbox));
}
// Decode a set of bboxes according to a set of prior bboxes
static void DecodeBBoxes(
const std::vector<util::NormalizedBBox>& prior_bboxes,
const std::vector<std::vector<float> >& prior_variances,
const cv::String& code_type, const bool variance_encoded_in_target,
const bool clip_bbox, const util::NormalizedBBox& clip_bounds,
const bool normalized_bbox, const std::vector<util::NormalizedBBox>& bboxes,
std::vector<util::NormalizedBBox>& decode_bboxes)
{
CV_Assert(prior_bboxes.size() == prior_variances.size());
CV_Assert(prior_bboxes.size() == bboxes.size());
size_t num_bboxes = prior_bboxes.size();
CV_Assert(num_bboxes == 0 || prior_variances[0].size() == 4);
decode_bboxes.clear(); decode_bboxes.resize(num_bboxes);
if(variance_encoded_in_target)
{
for (int i = 0; i < num_bboxes; ++i)
DecodeBBox<true>(prior_bboxes[i], prior_variances[i], code_type,
clip_bbox, clip_bounds, normalized_bbox,
bboxes[i], decode_bboxes[i]);
}
else
{
for (int i = 0; i < num_bboxes; ++i)
DecodeBBox<false>(prior_bboxes[i], prior_variances[i], code_type,
clip_bbox, clip_bounds, normalized_bbox,
bboxes[i], decode_bboxes[i]);
}
}
// Decode all bboxes in a batch
static void DecodeBBoxesAll(const std::vector<LabelBBox>& all_loc_preds,
const std::vector<util::NormalizedBBox>& prior_bboxes,
const std::vector<std::vector<float> >& prior_variances,
const int num, const bool share_location,
const int num_loc_classes, const int background_label_id,
const cv::String& code_type, const bool variance_encoded_in_target,
const bool clip, const util::NormalizedBBox& clip_bounds,
const bool normalized_bbox, std::vector<LabelBBox>& all_decode_bboxes)
{
CV_Assert(all_loc_preds.size() == num);
all_decode_bboxes.clear();
all_decode_bboxes.resize(num);
for (int i = 0; i < num; ++i)
{
// Decode predictions into bboxes.
const LabelBBox& loc_preds = all_loc_preds[i];
LabelBBox& decode_bboxes = all_decode_bboxes[i];
for (int c = 0; c < num_loc_classes; ++c)
{
int label = share_location ? -1 : c;
if (label == background_label_id)
continue; // Ignore background class.
LabelBBox::const_iterator label_loc_preds = loc_preds.find(label);
if (label_loc_preds == loc_preds.end())
CV_Error_(cv::Error::StsError, ("Could not find location predictions for label %d", label));
DecodeBBoxes(prior_bboxes, prior_variances,
code_type, variance_encoded_in_target, clip, clip_bounds,
normalized_bbox, label_loc_preds->second, decode_bboxes[label]);
}
}
}
// Get prior bounding boxes from prior_data
// prior_data: 1 x 2 x num_priors * 4 x 1 blob.
// num_priors: number of priors.
// prior_bboxes: stores all the prior bboxes in the format of util::NormalizedBBox.
// prior_variances: stores all the variances needed by prior bboxes.
static void GetPriorBBoxes(const float* priorData, const int& numPriors,
bool normalized_bbox, std::vector<util::NormalizedBBox>& priorBBoxes,
std::vector<std::vector<float> >& priorVariances)
{
priorBBoxes.clear(); priorBBoxes.resize(numPriors);
priorVariances.clear(); priorVariances.resize(numPriors);
for (int i = 0; i < numPriors; ++i)
{
int startIdx = i * 4;
util::NormalizedBBox& bbox = priorBBoxes[i];
bbox.xmin = priorData[startIdx];
bbox.ymin = priorData[startIdx + 1];
bbox.xmax = priorData[startIdx + 2];
bbox.ymax = priorData[startIdx + 3];
bbox.set_size(BBoxSize(bbox, normalized_bbox));
}
for (int i = 0; i < numPriors; ++i)
{
int startIdx = (numPriors + i) * 4;
// not needed here: priorVariances[i].clear();
for (int j = 0; j < 4; ++j)
{
priorVariances[i].push_back(priorData[startIdx + j]);
}
}
}
// Get location predictions from loc_data.
// loc_data: num x num_preds_per_class * num_loc_classes * 4 blob.
// num: the number of images.
// num_preds_per_class: number of predictions per class.
// num_loc_classes: number of location classes. It is 1 if share_location is
// true; and is equal to number of classes needed to predict otherwise.
// share_location: if true, all classes share the same location prediction.
// loc_pred_transposed: if true, represent four bounding box values as
// [y,x,height,width] or [x,y,width,height] otherwise.
// loc_preds: stores the location prediction, where each item contains
// location prediction for an image.
static void GetLocPredictions(const float* locData, const int num,
const int numPredsPerClass, const int numLocClasses,
const bool shareLocation, const bool locPredTransposed,
std::vector<LabelBBox>& locPreds)
{
locPreds.clear();
if (shareLocation)
{
CV_Assert(numLocClasses == 1);
}
locPreds.resize(num);
for (int i = 0; i < num; ++i, locData += numPredsPerClass * numLocClasses * 4)
{
LabelBBox& labelBBox = locPreds[i];
for (int p = 0; p < numPredsPerClass; ++p)
{
int startIdx = p * numLocClasses * 4;
for (int c = 0; c < numLocClasses; ++c)
{
int label = shareLocation ? -1 : c;
if (labelBBox.find(label) == labelBBox.end())
{
labelBBox[label].resize(numPredsPerClass);
}
util::NormalizedBBox& bbox = labelBBox[label][p];
if (locPredTransposed)
{
bbox.ymin = locData[startIdx + c * 4];
bbox.xmin = locData[startIdx + c * 4 + 1];
bbox.ymax = locData[startIdx + c * 4 + 2];
bbox.xmax = locData[startIdx + c * 4 + 3];
}
else
{
bbox.xmin = locData[startIdx + c * 4];
bbox.ymin = locData[startIdx + c * 4 + 1];
bbox.xmax = locData[startIdx + c * 4 + 2];
bbox.ymax = locData[startIdx + c * 4 + 3];
}
}
}
}
}
// Get confidence predictions from conf_data.
// conf_data: num x num_preds_per_class * num_classes blob.
// num: the number of images.
// num_preds_per_class: number of predictions per class.
// num_classes: number of classes.
// conf_preds: stores the confidence prediction, where each item contains
// confidence prediction for an image.
static void GetConfidenceScores(const float* confData, const int num,
const int numPredsPerClass, const int numClasses,
std::vector<Mat>& confPreds)
{
int shape[] = { numClasses, numPredsPerClass };
for (int i = 0; i < num; i++)
confPreds.push_back(Mat(2, shape, CV_32F));
for (int i = 0; i < num; ++i, confData += numPredsPerClass * numClasses)
{
Mat labelScores = confPreds[i];
for (int c = 0; c < numClasses; ++c)
{
for (int p = 0; p < numPredsPerClass; ++p)
{
labelScores.at<float>(c, p) = confData[p * numClasses + c];
}
}
}
}
// Compute the jaccard (intersection over union IoU) overlap between two bboxes.
template<bool normalized>
static float JaccardOverlap(const util::NormalizedBBox& bbox1,
const util::NormalizedBBox& bbox2)
{
util::NormalizedBBox intersect_bbox;
intersect_bbox.xmin = std::max(bbox1.xmin, bbox2.xmin);
intersect_bbox.ymin = std::max(bbox1.ymin, bbox2.ymin);
intersect_bbox.xmax = std::min(bbox1.xmax, bbox2.xmax);
intersect_bbox.ymax = std::min(bbox1.ymax, bbox2.ymax);
float intersect_size = BBoxSize(intersect_bbox, normalized);
if (intersect_size > 0)
{
float bbox1_size = BBoxSize(bbox1, normalized);
float bbox2_size = BBoxSize(bbox2, normalized);
return intersect_size / (bbox1_size + bbox2_size - intersect_size);
}
else
{
return 0.;
}
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::DetectionOutputLayer ieLayer(name);
ieLayer.setNumClasses(_numClasses);
ieLayer.setShareLocation(_shareLocation);
ieLayer.setBackgroudLabelId(_backgroundLabelId);
ieLayer.setNMSThreshold(_nmsThreshold);
ieLayer.setTopK(_topK > 0 ? _topK : _keepTopK);
ieLayer.setKeepTopK(_keepTopK);
ieLayer.setConfidenceThreshold(_confidenceThreshold);
ieLayer.setVariantEncodedInTarget(_varianceEncodedInTarget);
ieLayer.setCodeType("caffe.PriorBoxParameter." + _codeType);
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(3));
InferenceEngine::Builder::Layer l = ieLayer;
l.getParameters()["eta"] = std::string("1.0");
l.getParameters()["clip"] = _clip;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert(nodes.size() == 3);
auto& box_logits = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto& class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
auto& proposals = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
ngraph::op::DetectionOutputAttrs attrs;
attrs.num_classes = _numClasses;
attrs.background_label_id = _backgroundLabelId;
attrs.top_k = _topK > 0 ? _topK : _keepTopK;
attrs.variance_encoded_in_target = _varianceEncodedInTarget;
attrs.keep_top_k = {_keepTopK};
attrs.nms_threshold = _nmsThreshold;
attrs.confidence_threshold = _confidenceThreshold;
attrs.share_location = _shareLocation;
attrs.clip_before_nms = _clip;
attrs.code_type = std::string{"caffe.PriorBoxParameter." + _codeType};
attrs.normalized = true;
auto det_out = std::make_shared<ngraph::op::DetectionOutput>(box_logits, class_preds,
proposals, attrs);
return Ptr<BackendNode>(new InfEngineNgraphNode(det_out));
}
#endif // HAVE_DNN_NGRAPH
};
float util::caffe_box_overlap(const util::NormalizedBBox& a, const util::NormalizedBBox& b)
{
return DetectionOutputLayerImpl::JaccardOverlap<false>(a, b);
}
float util::caffe_norm_box_overlap(const util::NormalizedBBox& a, const util::NormalizedBBox& b)
{
return DetectionOutputLayerImpl::JaccardOverlap<true>(a, b);
}
const std::string DetectionOutputLayerImpl::_layerName = std::string("DetectionOutput");
Ptr<DetectionOutputLayer> DetectionOutputLayer::create(const LayerParams &params)
{
return Ptr<DetectionOutputLayer>(new DetectionOutputLayerImpl(params));
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,780 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/eltwise.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class EltwiseLayerImpl CV_FINAL : public EltwiseLayer
{
public:
enum EltwiseOp
{
PROD = 0,
SUM = 1,
MAX = 2,
DIV = 3
} op;
std::vector<float> coeffs;
enum OutputChannelsMode
{
ELTWISE_CHANNNELS_SAME = 0, //!< number of channels from inputs must be the same and equal to output's number of channels
ELTWISE_CHANNNELS_INPUT_0, //!< number of channels from inputs may be different,
//!< output's number of channels is equal to number of channels of first input
//!< number of channels of other inputs should not be greater than number of channels of first input
ELTWISE_CHANNNELS_INPUT_0_TRUNCATE, //!< number of channels from inputs may be different,
//!< output's number of channels is equal to number of channels of first input
//!< there is restriction on number of channels of other inputs
//!< extra channels of other inputs is ignored
ELTWISE_CHANNNELS_USE_MAX, //!< number of channels from inputs may be different,
//!< output's number of channels is equal to maximal number of input channels
//!< @note supported operation: `SUM`
} channelsModeInput;
mutable OutputChannelsMode channelsMode; //!< "optimized" channels mode (switch to ELTWISE_CHANNNELS_SAME if number of input channels are equal)
mutable /*size_t*/int outputChannels;
EltwiseLayerImpl(const LayerParams& params)
: outputChannels(0)
{
setParamsFrom(params);
op = SUM;
if (params.has("operation"))
{
String operation = toLowerCase(params.get<String>("operation"));
if (operation == "prod")
op = PROD;
else if (operation == "sum")
op = SUM;
else if (operation == "max")
op = MAX;
else if (operation == "div")
op = DIV;
else
CV_Error(cv::Error::StsBadArg, "Unknown operation type \"" + operation + "\"");
}
if (params.has("coeff"))
{
DictValue paramCoeff = params.get("coeff");
int i, n = paramCoeff.size();
coeffs.resize(n);
for (i = 0; i < n; i++)
{
coeffs[i] = paramCoeff.get<float>(i);
}
}
channelsModeInput = ELTWISE_CHANNNELS_SAME;
if (params.has("output_channels_mode"))
{
String v = toLowerCase(params.get<String>("output_channels_mode"));
if (v == "same")
{
channelsModeInput = ELTWISE_CHANNNELS_SAME;
}
else if (v == "input_0")
{
channelsModeInput = ELTWISE_CHANNNELS_INPUT_0;
}
else if (v == "input_0_truncate")
{
channelsModeInput = ELTWISE_CHANNNELS_INPUT_0_TRUNCATE;
}
else if (v == "max_input_channels")
{
channelsModeInput = ELTWISE_CHANNNELS_USE_MAX;
if (op != SUM)
CV_Error(cv::Error::StsBadArg, "[" + type + "]:(" + name + ") 'max' channels mode is limited to SUM operation only");
}
else
CV_Error(cv::Error::StsBadArg, "[" + type + "]:(" + name + ") unknown channels mode: \"" + v + "\"");
}
channelsMode = channelsModeInput;
// TODO Must have checks for other unknown options
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && op != DIV) || // TODO: not implemented, see PR #15811
((((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty()))
|| backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && channelsMode == ELTWISE_CHANNNELS_SAME));
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() >= 2);
CV_Assert(inputs[0].size() >= 2);
CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size());
CV_Assert(op == SUM || coeffs.size() == 0);
int dims = inputs[0].size();
// Number of channels in output shape is determined by the first input tensor.
bool variableChannels = false;
int numChannels = inputs[0][1];
for (size_t i = 1; i < inputs.size(); i++)
{
CV_Assert(inputs[0][0] == inputs[i][0]); // batch sizes are equal
int input_channels = inputs[i][1];
if (numChannels != input_channels)
variableChannels = true;
if (channelsModeInput == ELTWISE_CHANNNELS_SAME)
{
CV_Assert(numChannels == input_channels);
}
else if (channelsModeInput == ELTWISE_CHANNNELS_INPUT_0)
{
CV_Assert(numChannels >= input_channels);
}
else if (channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
{
// nothing to check
}
else if (channelsModeInput == ELTWISE_CHANNNELS_USE_MAX)
{
numChannels = std::max(numChannels, input_channels);
}
else
{
CV_Assert(0 && "Internal error");
}
for (size_t j = 2; j < dims; j++)
CV_Assert(inputs[0][j] == inputs[i][j]);
}
channelsMode = variableChannels ? channelsModeInput : ELTWISE_CHANNNELS_SAME;
outputChannels = numChannels;
outputs.assign(1, inputs[0]);
outputs[0][1] = numChannels;
return false;
}
class EltwiseInvoker : public ParallelLoopBody
{
EltwiseLayerImpl& self;
std::vector<const Mat*> srcs;
std::vector<int> srcNumChannels;
int nsrcs;
Mat* dst;
std::vector<float> coeffs;
int nstripes;
const ActivationLayer* activ;
int channels;
size_t planeSize;
EltwiseInvoker(EltwiseLayerImpl& self_)
: self(self_)
, nsrcs(0), dst(0), nstripes(0), activ(0), channels(0)
, planeSize(0)
{}
public:
static void run(EltwiseLayerImpl& self,
const Mat* srcs, int nsrcs, Mat& dst,
int nstripes)
{
const EltwiseOp op = self.op;
CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous());
CV_Assert(self.coeffs.empty() || self.coeffs.size() == (size_t)nsrcs);
CV_CheckGE(nsrcs, 2, "");
CV_Assert(self.outputChannels == dst.size[1]);
EltwiseInvoker p(self);
p.srcs.resize(nsrcs);
p.srcNumChannels.resize(nsrcs);
p.coeffs = self.coeffs; // can be sorted
bool sortInputs = false;
for( int i = 0; i < nsrcs; i++ )
{
p.srcs[i] = &srcs[i];
CV_CheckEQ(srcs[i].dims, dst.dims, "");
CV_Assert(srcs[i].isContinuous());
CV_Assert(srcs[i].type() == dst.type());
p.srcNumChannels[i] = (srcs[i].dims >= 4) ? srcs[i].size[1] : 1;
if (self.channelsMode == ELTWISE_CHANNNELS_SAME)
{
CV_Assert(srcs[i].size == dst.size);
}
else if (self.channelsMode == ELTWISE_CHANNNELS_INPUT_0)
{
if (i == 0)
CV_Assert(srcs[0].size == dst.size);
CV_Assert(self.outputChannels >= p.srcNumChannels[i]);
sortInputs = true;
}
else if (self.channelsMode == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
{
if (i == 0)
CV_Assert(srcs[0].size == dst.size);
sortInputs = true;
}
else if (self.channelsMode == ELTWISE_CHANNNELS_USE_MAX)
{
CV_Assert(op == SUM);
CV_Assert(self.outputChannels >= p.srcNumChannels[i]);
sortInputs = true;
}
else
{
CV_Assert(0 && "Internal error");
}
if (sortInputs)
{
// Sort srcs and coefficients in the desc order by number of channels
for (int j = i; j >= 1; j--)
{
if (std::min(self.outputChannels, p.srcs[j - 1]->size[1]) < std::min(self.outputChannels, p.srcs[j]->size[1]))
{
std::swap(p.srcs[j - 1], p.srcs[j]);
std::swap(p.srcNumChannels[j - 1], p.srcNumChannels[j]);
if (!p.coeffs.empty())
std::swap(p.coeffs[j - 1], p.coeffs[j]);
}
else
break;
}
}
}
p.nsrcs = nsrcs;
p.dst = &dst;
p.nstripes = nstripes;
p.channels = (dst.dims >= 4 ? dst.size[1] : 1);
p.planeSize = dst.total(dst.dims >= 4 ? 2 : 1);
CV_CheckEQ(dst.total(), dst.size[0] * p.channels * p.planeSize, "");
bool simpleCoeffs = true;
if (op == SUM && !p.coeffs.empty())
{
CV_CheckEQ(p.coeffs.size(), (size_t)nsrcs, "");
for (size_t i = 0; i < p.coeffs.size(); i++)
{
if (p.coeffs[i] != 1)
{
simpleCoeffs = false;
break;
}
}
}
if (simpleCoeffs)
p.coeffs.clear();
p.activ = self.activ.get();
parallel_for_(Range(0, nstripes), p, nstripes);
}
void operator()(const Range& r) const CV_OVERRIDE
{
const EltwiseOp op = self.op;
size_t total = dst->size[0]*planeSize;
size_t stripeSize = (total + nstripes - 1)/nstripes;
size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = std::min(r.end*stripeSize, total);
const float* coeffsptr = !coeffs.empty() ? &coeffs[0] : 0;
float* dstptr0 = dst->ptr<float>();
int blockSize0 = 1 << 12;
for (size_t ofs = stripeStart; ofs < stripeEnd; )
{
int sampleIdx = (int)(ofs / planeSize);
int delta = (int)ofs - sampleIdx * planeSize;
int blockSize = std::min(blockSize0, std::min((int)(stripeEnd - ofs), (int)planeSize - delta));
if( blockSize <= 0 )
break;
ofs += blockSize;
for (int c = 0; c < channels; c++)
{
size_t dstIdx = delta + (sampleIdx*channels + c)*planeSize;
float* dstptr = dstptr0 + dstIdx;
// process first two inputs
{
const float* srcptr0 = srcs[0]->ptr<float>() + dstIdx;
const int inputIdx = 1;
int src1_channels = srcNumChannels[inputIdx];
if (c >= src1_channels)
{
// no data from second input
if (!coeffsptr || coeffsptr[0] == 1.0f)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = srcptr0[j];
}
}
else
{
float c0 = coeffsptr[0];
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = c0*srcptr0[j];
}
}
}
else
{
size_t srcIdx = delta + (sampleIdx * src1_channels + c) * planeSize;
const float* srcptrI = srcs[inputIdx]->ptr<float>() + srcIdx;
if (op == PROD)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = srcptr0[j] * srcptrI[j];
}
}
else if (op == DIV)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = srcptr0[j] / srcptrI[j];
}
}
else if (op == MAX)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = std::max(srcptr0[j], srcptrI[j]);
}
}
else if (op == SUM)
{
if (!coeffsptr || (coeffsptr[0] == 1.0f && coeffsptr[1] == 1.0f))
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = srcptr0[j] + srcptrI[j];
}
}
else
{
float c0 = coeffsptr[0];
float c1 = coeffsptr[1];
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = c0*srcptr0[j] + c1*srcptrI[j];
}
}
}
else
CV_Error(Error::StsInternal, "");
}
}
// aggregate other inputs (3+)
for (size_t inputIdx = 2; inputIdx < nsrcs; inputIdx++)
{
int srcI_channels = srcNumChannels[inputIdx];
if (c >= srcI_channels)
continue; // no data from second input
size_t srcIdx = delta + (sampleIdx * srcI_channels + c) * planeSize;
const float* srcptrI = srcs[inputIdx]->ptr<float>() + srcIdx;
if (op == PROD)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] *= srcptrI[j];
}
}
else if (op == DIV)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] /= srcptrI[j];
}
}
else if (op == MAX)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = std::max(dstptr[j], srcptrI[j]);
}
}
else if (op == SUM)
{
if (!coeffsptr || coeffsptr[inputIdx] == 1.0f)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] += srcptrI[j];
}
}
else
{
float cI = coeffsptr[inputIdx];
for (int j = 0; j < blockSize; j++)
{
dstptr[j] += cI * srcptrI[j];
}
}
}
else
CV_Error(Error::StsInternal, "");
}
}
if( activ )
{
float* ptr = dstptr0 + delta + sampleIdx*channels*planeSize;
activ->forwardSlice(ptr, ptr, blockSize, planeSize, 0, channels);
}
}
}
};
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
if ((inputs_.depth() == CV_16S && op != SUM) || (channelsMode != ELTWISE_CHANNNELS_SAME))
return false;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
switch (op)
{
case SUM:
{
int channels = total(shape(outputs[0]), 0, 2);
int plane_size = total(shape(outputs[0]), 2);
if (channels % 4 == 0 && plane_size % 4 == 0)
{
size_t localsize[] = { 128 };
size_t globalsize[] = { (size_t)channels / 4 * localsize[0] };
String opts;
if (inputs_.depth() == CV_16S)
opts = " -DDtype=half -DDtype4=half4 -DDtype8=half8";
else
opts = " -DDtype=float -DDtype4=float4 -DDtype8=float8";
for (int i = 0; i < (inputs.size() - 1); ++i)
{
String buildopt = format("-DLOOP=%d", i) + opts;
ocl::Kernel kernel("op_sum4", ocl::dnn::eltwise_oclsrc, buildopt);
int idx = 0;
UMat inpMat = (i == 0) ? inputs[0] : UMat();
float coeff1 = (coeffs.empty() || i > 0) ? 1.0f : coeffs[i];
float coeff2 = coeffs.empty() ? 1.0f : coeffs[i + 1];
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inputs[0]));
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inputs[1]));
kernel.set(idx++, (int)plane_size);
kernel.set(idx++, (float)coeff1);
kernel.set(idx++, (float)coeff2);
kernel.set(idx++, ocl::KernelArg::PtrReadWrite(outputs[0]));
bool ret = kernel.run(1, globalsize, localsize, false);
if (!ret)
return false;
}
}
else
{
if (inputs_.depth() == CV_16S)
return false;
float coeff1 = coeffs.empty() ? 1.f : coeffs[0];
float coeff2 = coeffs.empty() ? 1.f : coeffs[1];
UMat mul0, mul1;
multiply(coeff1, inputs[0], mul0);
multiply(coeff2, inputs[1], mul1);
add(mul0, mul1, outputs[0]);
for (int i = 2; i < inputs.size(); ++i)
{
float coeff = coeffs.empty() ? 1.f : coeffs[i];
multiply(coeff, inputs[i], mul0);
add(mul0, outputs[0], outputs[0]);
}
}
}
break;
case PROD:
multiply(inputs[0], inputs[1], outputs[0]);
for (int i = 2; i < inputs.size(); ++i)
multiply(inputs[i], outputs[0], outputs[0]);
break;
case DIV:
divide(inputs[0], inputs[1], outputs[0]);
for (int i = 2; i < inputs.size(); ++i)
divide(outputs[0], inputs[i], outputs[0]);
break;
case MAX:
max(inputs[0], inputs[1], outputs[0]);
for (int i = 2; i < inputs.size(); ++i)
max(inputs[i], outputs[0], outputs[0]);
break;
default:
return false;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(outputs.size() == 1);
const int nstripes = getNumThreads();
EltwiseInvoker::run(*this,
&inputs[0], (int)inputs.size(), outputs[0],
nstripes);
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
auto op_ = [this] {
switch (op) {
case MAX: return cuda4dnn::EltwiseOpType::MAX;
case SUM: return cuda4dnn::EltwiseOpType::SUM;
case PROD: return cuda4dnn::EltwiseOpType::PRODUCT;
case DIV: return cuda4dnn::EltwiseOpType::DIV;
}
return cuda4dnn::EltwiseOpType::SUM;
}();
return make_cuda_node<cuda4dnn::EltwiseOp>(preferableTarget, std::move(context->stream), op_, coeffs);
}
#endif
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Expr topExpr;
std::vector<Halide::Buffer<> > inputBuffers = halideBuffers(input);
switch (op)
{
case SUM:
if (coeffs.empty())
{
topExpr = inputBuffers[0](x, y, c, n) +
inputBuffers[1](x, y, c, n);
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr += inputBuffers[i](x, y, c, n);
}
else
{
topExpr = coeffs[0] * inputBuffers[0](x, y, c, n) +
coeffs[1] * inputBuffers[1](x, y, c, n);
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr += coeffs[i] * inputBuffers[i](x, y, c, n);
}
break;
case PROD:
topExpr = inputBuffers[0](x, y, c, n) *
inputBuffers[1](x, y, c, n);
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr *= inputBuffers[i](x, y, c, n);
break;
case DIV:
topExpr = inputBuffers[0](x, y, c, n) /
inputBuffers[1](x, y, c, n);
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr /= inputBuffers[i](x, y, c, n);
break;
case MAX:
topExpr = max(inputBuffers[0](x, y, c, n),
inputBuffers[1](x, y, c, n));
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr = max(topExpr, inputBuffers[i](x, y, c, n));
break;
default:
return Ptr<BackendNode>();
}
top(x, y, c, n) = topExpr;
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::Builder::EltwiseLayer ieLayer(name);
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
if (op == SUM)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::SUM);
else if (op == PROD)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MUL);
else if (op == DIV)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::DIV);
else if (op == MAX)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX);
else
CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
InferenceEngine::Builder::Layer l = ieLayer;
if (!coeffs.empty())
l.getParameters()["coeff"] = coeffs;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto curr_node = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
if (!coeffs.empty()) {
auto coeff = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &coeffs[0]);
curr_node = std::make_shared<ngraph::op::v1::Multiply>(curr_node, coeff, ngraph::op::AutoBroadcastType::NUMPY);
}
for (size_t i = 1; i < nodes.size(); i++)
{
auto next_node = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
if (!coeffs.empty()) {
auto coeff = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &coeffs[i]);
next_node = std::make_shared<ngraph::op::v1::Multiply>(next_node, coeff, ngraph::op::AutoBroadcastType::NUMPY);
}
switch (op) {
case SUM: curr_node = std::make_shared<ngraph::op::v1::Add>(curr_node, next_node); break;
case PROD: curr_node = std::make_shared<ngraph::op::v1::Multiply>(curr_node, next_node); break;
case DIV: curr_node = std::make_shared<ngraph::op::v1::Divide>(curr_node, next_node); break;
case MAX: curr_node = std::make_shared<ngraph::op::v1::Maximum>(curr_node, next_node); break;
default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
}
}
return Ptr<BackendNode>(new InfEngineNgraphNode(curr_node));
}
#endif // HAVE_DNN_NGRAPH
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
CV_Assert(inputs.size());
// FIXIT: handle inputs with different number of channels
long flops = inputs.size() * total(inputs[0]);
return flops;
}
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
{
if (activ.empty() || layer.empty())
{
activ = layer;
return !activ.empty();
}
else
return false;
}
Ptr<ActivationLayer> activ;
};
Ptr<EltwiseLayer> EltwiseLayer::create(const LayerParams& params)
{
return Ptr<EltwiseLayer>(new EltwiseLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,238 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <float.h>
#include <algorithm>
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/reshape.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class FlattenLayerImpl CV_FINAL : public FlattenLayer
{
public:
FlattenLayerImpl(const LayerParams &params)
{
_startAxis = params.get<int>("axis", 1);
_endAxis = params.get<int>("end_axis", -1);
setParamsFrom(params);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() > 0);
for (size_t i = 1; i < inputs.size(); i++)
{
CV_Assert(inputs[i] == inputs[0]);
}
int numAxes = inputs[0].size();
int startAxis = clamp(_startAxis, numAxes);
int endAxis = clamp(_endAxis, numAxes);
CV_Assert(startAxis >= 0);
CV_Assert(endAxis >= startAxis && endAxis < (int)numAxes);
size_t flattenedDimensionSize = total(inputs[0], startAxis, endAxis + 1);
MatShape outputShapeVec;
for (int i = 0; i < startAxis; i++)
{
outputShapeVec.push_back(inputs[0][i]);
}
outputShapeVec.push_back(flattenedDimensionSize);
for (size_t i = endAxis + 1; i < numAxes; i++)
{
outputShapeVec.push_back(inputs[0][i]);
}
CV_Assert(outputShapeVec.size() <= 4);
outputs.resize(inputs.size(), outputShapeVec);
return true;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
int numAxes = inputs[0].dims;
_startAxis = clamp(_startAxis, numAxes);
_endAxis = clamp(_endAxis, numAxes);
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
std::vector<UMat> inpvec;
std::vector<UMat> outputs;
inputs_arr.getUMatVector(inpvec);
outputs_arr.getUMatVector(outputs);
std::vector<UMat*> inputs(inpvec.size());
for (int i = 0; i < inpvec.size(); i++)
inputs[i] = &inpvec[i];
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape outShape = shape(outputs[i]);
UMat& output = outputs_arr.getUMatRef(i);
output = inputs[i]->reshape(1, (int)outShape.size(), &outShape[0]);
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
outputs_arr.isUMatVector(),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape outShape = shape(outputs[i]);
if (inputs[i].data != outputs[i].data)
{
inputs[i].reshape(1, (int)outShape.size(), &outShape[0]).copyTo(outputs[i]);
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
}
#endif
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
ieLayer.setType("Flatten");
ieLayer.getParameters()["axis"] = (size_t)_startAxis;
ieLayer.getParameters()["end_axis"] = _endAxis; // Do not cast to size_t because it might be negative.
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<size_t> dims = ieInpNode->get_shape();
int numAxes = dims.size();
int startAxis = clamp(_startAxis, numAxes);
int endAxis = clamp(_endAxis, numAxes);
CV_Assert(startAxis >= 0);
CV_Assert(endAxis >= startAxis && endAxis < numAxes);
int64_t flattenedDimensionSize = std::accumulate(dims.begin() + startAxis,
dims.begin() + endAxis + 1, 1, std::multiplies<size_t>());
std::vector<int64_t> outputShapeVec(dims.begin(), dims.begin() + startAxis);
outputShapeVec.push_back(flattenedDimensionSize);
outputShapeVec.insert(outputShapeVec.end(), dims.begin() + endAxis + 1, dims.end());
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape({outputShapeVec.size()}), outputShapeVec.data());
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, shape, true);
return Ptr<BackendNode>(new InfEngineNgraphNode(reshape));
}
#endif // HAVE_DNN_NGRAPH
// HAVE_INF_ENGINE
int _startAxis;
int _endAxis;
};
Ptr<FlattenLayer> FlattenLayer::create(const LayerParams& params)
{
return Ptr<FlattenLayer>(new FlattenLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,538 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
using namespace cv::dnn::ocl4dnn;
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/inner_product.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class FullyConnectedLayerImpl CV_FINAL : public InnerProductLayer
{
public:
enum { VEC_ALIGN = 8 };
#ifdef HAVE_OPENCL
Ptr<OCL4DNNInnerProduct<float> > innerProductOp;
std::vector<UMat> umat_blobs;
std::vector<UMat> half_blobs;
#endif
FullyConnectedLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
int numOutput = params.get<int>("num_output");
int innerSize = (int)blobs[0].total() / numOutput;
bias = params.get<bool>("bias_term", true);
axis = params.get<int>("axis", 1);
CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total());
CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutput == blobs[1].total()));
weightsMat = blobs[0] = blobs[0].reshape(1, numOutput);
int vecsize = weightsMat.cols;
if( vecsize % VEC_ALIGN != 0 )
{
int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN);
Mat weightsBuf(weightsMat.rows, vecsize_aligned, weightsMat.type());
Mat wpadding = weightsBuf.colRange(vecsize, vecsize_aligned);
wpadding.setTo(Scalar::all(0.));
weightsMat = weightsBuf.colRange(0, vecsize);
blobs[0].copyTo(weightsMat);
}
if (bias)
biasMat = blobs[1] = blobs[1].reshape(1, 1);
else
biasMat = Mat::zeros(1, numOutput, weightsMat.type());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
CV_Assert(blobs[0].dims == 2);
int cAxis = clamp(axis, inputs[0]);
int numOutput = blobs[0].size[0];
MatShape outShape(cAxis + 1);
for (int i = 0; i < cAxis; ++i)
outShape[i] = inputs[0][i];
outShape.back() = numOutput;
outputs.resize(inputs.size(), outShape);
CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
return false;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1) ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && axis == 1);
}
virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
{
if (activ.empty() || layer.empty())
{
activ = layer;
return !activ.empty();
}
else
return false;
}
class FullyConnected : public ParallelLoopBody
{
public:
FullyConnected() : srcMat(0), weights(0), biasMat(0), activ(0), dstMat(0), nstripes(0), useAVX(false), useAVX2(false), useAVX512(false) {}
static void run(const Mat& srcMat, const Mat& weights, const Mat& biasMat,
Mat& dstMat, const ActivationLayer* activ, int nstripes)
{
CV_Assert( srcMat.dims == 2 && srcMat.cols == weights.cols &&
dstMat.rows == srcMat.rows && dstMat.cols == weights.rows &&
srcMat.type() == weights.type() && weights.type() == dstMat.type() &&
srcMat.type() == CV_32F &&
(biasMat.empty() || (biasMat.type() == srcMat.type() &&
biasMat.isContinuous() && (int)biasMat.total() == dstMat.cols)) );
FullyConnected p;
p.srcMat = &srcMat;
p.weights = &weights;
p.biasMat = &biasMat;
p.dstMat = &dstMat;
p.nstripes = nstripes;
p.activ = activ;
p.useAVX = checkHardwareSupport(CPU_AVX);
p.useAVX2 = checkHardwareSupport(CPU_AVX2);
p.useAVX512 = CV_CPU_HAS_SUPPORT_AVX512_SKX;
parallel_for_(Range(0, nstripes), p, nstripes);
}
void operator()(const Range& r) const CV_OVERRIDE
{
int valign = FullyConnectedLayerImpl::VEC_ALIGN;
int nsamples = srcMat->rows;
int nw0 = weights->rows;
int k, vecsize = srcMat->cols;
int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN);
size_t total = (size_t)nsamples*nw0;
size_t stripeSize = (total + nstripes - 1)/nstripes;
size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = r.end == nstripes ? total : std::min(r.end*stripeSize, total);
size_t wstep = weights->step1();
AutoBuffer<float> srcbuf(vecsize_aligned + valign);
float* sptr = alignPtr(srcbuf.data(), (int)(valign*sizeof(float)));
for( k = vecsize; k < vecsize_aligned; k++ )
sptr[k] = 0.f;
for( size_t ofs = stripeStart; ofs < stripeEnd; )
{
int sampleIdx = (int)(ofs / nw0);
int delta = (int)(ofs - (size_t)sampleIdx*nw0);
const float* sptr_ = srcMat->ptr<float>(sampleIdx);
const float* wptr = weights->ptr<float>(delta);
float* dptr = dstMat->ptr<float>(sampleIdx) + delta;
const float* biasptr = biasMat->ptr<float>() + delta;
int nw = std::min(nw0 - delta, (int)(stripeEnd - ofs));
memcpy(sptr, sptr_, vecsize*sizeof(sptr[0]));
#if CV_TRY_AVX512_SKX
if( useAVX512 )
opt_AVX512_SKX::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
else
#endif
#if CV_TRY_AVX2
if( useAVX2 )
opt_AVX2::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
else
#endif
#if CV_TRY_AVX
if( useAVX )
opt_AVX::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
else
#endif
{
int i = 0;
#if CV_SIMD128
for( ; i <= nw - 4; i += 4, wptr += 4*wstep )
{
v_float32x4 vs0 = v_setall_f32(0.f), vs1 = v_setall_f32(0.f);
v_float32x4 vs2 = v_setall_f32(0.f), vs3 = v_setall_f32(0.f);
for( k = 0; k < vecsize; k += 4 )
{
v_float32x4 v = v_load_aligned(sptr + k);
vs0 += v*v_load_aligned(wptr + k);
vs1 += v*v_load_aligned(wptr + wstep + k);
vs2 += v*v_load_aligned(wptr + wstep*2 + k);
vs3 += v*v_load_aligned(wptr + wstep*3 + k);
}
v_float32x4 s = v_reduce_sum4(vs0, vs1, vs2, vs3);
s += v_load(biasptr + i);
v_store(dptr + i, s);
}
#endif
for( ; i < nw; i++, wptr += wstep )
{
float s0=biasptr[i];
for( k = 0; k < vecsize; k++ )
{
float v = sptr[k];
s0 += v*wptr[k];
}
dptr[i] = s0;
}
}
if(activ)
activ->forwardSlice(dptr, dptr, 1, 1, delta, delta + nw);
ofs += nw;
}
}
const Mat *srcMat, *weights, *biasMat;
const ActivationLayer* activ;
Mat* dstMat;
int nstripes;
bool useAVX;
bool useAVX2;
bool useAVX512;
};
#ifdef HAVE_OPENCL
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
{
innerProductOp.release();
umat_blobs.clear();
half_blobs.clear();
}
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int axisCan = clamp(axis, inputs[0].dims);
int numOutput = blobs[0].size[0];
int innerSize = blobs[0].size[1];
int outerSize = total(shape(inputs[0]), 0, axisCan);
bool ret = true;
if (innerProductOp.empty())
{
size_t n = blobs.size();
umat_blobs.resize(n);
for (int i = 0; i < n; i++) blobs[i].copyTo(umat_blobs[i]);
OCL4DNNInnerProductConfig config;
config.num_output = numOutput;
config.bias_term = bias;
config.M = outerSize;
config.K = innerSize;
config.use_half = use_half;
if (use_half)
{
half_blobs.resize(umat_blobs.size());
for (int i = 0; i < umat_blobs.size(); i++)
{
if (!umat_blobs[i].empty())
convertFp16(umat_blobs[i], half_blobs[i]);
}
}
innerProductOp = Ptr<OCL4DNNInnerProduct<float> >(new OCL4DNNInnerProduct<float>(config));
}
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape inshape, outshape;
inshape = shape(outerSize, innerSize);
outshape = shape(outerSize, numOutput);
UMat srcMat, dstMat;
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
if (!innerProductOp->Forward(srcMat, (use_half) ? half_blobs[0] : umat_blobs[0],
(bias) ? (use_half ? half_blobs[1] : umat_blobs[1]) : UMat(),
dstMat))
{
ret = false;
break;
}
if (!use_half && bias && (outerSize > 1))
{
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
UMat& biases = umat_blobs[1];
cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
}
}
if (ret) return true;
UMat& weights = umat_blobs[0];
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape inshape, outshape;
inshape = shape(outerSize, innerSize);
outshape = shape(outerSize, numOutput);
UMat srcMat, dstMat, srcMat_fp32, dstMat_fp32;
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
if (use_half)
{
convertFp16(srcMat, srcMat_fp32);
convertFp16(dstMat, dstMat_fp32);
}
else
{
srcMat_fp32 = srcMat;
dstMat_fp32 = dstMat;
}
cv::gemm(srcMat_fp32, weights, 1, noArray(), 0, dstMat_fp32, GEMM_2_T);
if (bias)
{
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
UMat& biases = umat_blobs[1];
cv::gemm(biasOnesMat, biases, 1, dstMat_fp32, 1, dstMat_fp32, 0);
}
if (use_half)
{
convertFp16(srcMat_fp32, srcMat);
convertFp16(dstMat_fp32, dstMat);
}
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> input, output;
inputs_arr.getMatVector(input);
outputs_arr.getMatVector(output);
int axisCan = clamp(axis, input[0].dims);
int outerSize = input[0].total(0, axisCan);
for (size_t i = 0; i < input.size(); i++)
{
Mat srcMat = input[i].reshape(1, outerSize);
Mat dstMat = output[i].reshape(1, outerSize);
const int nstripes = getNumThreads();
FullyConnected::run(srcMat, weightsMat, biasMat, dstMat, activ.get(), nstripes);
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto flatten_start_axis = clamp(axis, input_wrapper->getRank());
auto biasMat_ = bias ? biasMat : Mat();
return make_cuda_node<cuda4dnn::InnerProductOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), flatten_start_axis, weightsMat, biasMat_);
}
#endif
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
int inW, inH, inC, inN, outC = blobs[0].size[0];
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
auto weights = wrapToHalideBuffer(blobs[0], {inW, inH, inC, outC});
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::RDom r(0, inW, 0, inH, 0, inC);
Halide::Expr topExpr = sum(inputBuffer(r.x, r.y, r.z, n) *
weights(r.x, r.y, r.z, c));
if (bias)
{
Halide::Buffer<float> bias = wrapToHalideBuffer(blobs[1], {outC});
topExpr += bias(c);
}
top(x, y, c, n) = topExpr;
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::FullyConnectedLayer ieLayer(name);
const int outNum = blobs[0].size[0];
ieLayer.setOutputNum(outNum);
InferenceEngine::Builder::Layer l = ieLayer;
addConstantData("weights", wrapToInfEngineBlob(blobs[0], {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1], 1, 1}, InferenceEngine::Layout::OIHW), l);
if (bias)
addConstantData("biases", wrapToInfEngineBlob(blobs[1], {(size_t)outNum}, InferenceEngine::Layout::C), l);
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
int batch = ieInpNode->get_shape()[0];
std::vector<size_t> data = {(size_t)batch, (size_t)blobs[0].size[1]};
auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
auto matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
if (bias) {
auto bias_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
ngraph::Shape{(size_t)blobs[1].size[1]}, blobs[1].data);
auto fc = std::make_shared<ngraph::op::v1::Add>(matmul, bias_node, ngraph::op::AutoBroadcastType::NUMPY);
return Ptr<BackendNode>(new InfEngineNgraphNode(fc));
}
return Ptr<BackendNode>(new InfEngineNgraphNode(matmul));
}
#endif // HAVE_DNN_NGRAPH
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(inputs); // suppress unused variable warning
long flops = 0;
int innerSize = blobs[0].size[1];
for(int i = 0; i < outputs.size(); i++)
{
flops += CV_BIG_INT(3)*innerSize*total(outputs[i]);
}
return flops;
}
bool bias;
Mat weightsMat, biasMat;
Ptr<ActivationLayer> activ;
};
Ptr<InnerProductLayer> InnerProductLayer::create(const LayerParams& params)
{
return Ptr<InnerProductLayer>(new FullyConnectedLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,243 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
namespace cv
{
namespace dnn
{
namespace util
{
std::string makeName(const std::string& str1, const std::string& str2)
{
return str1 + str2;
}
bool getParameter(const LayerParams &params, const std::string& nameBase, const std::string& nameAll,
std::vector<size_t>& parameter, bool hasDefault = false, const std::vector<size_t>& defaultValue = std::vector<size_t>(2, 0))
{
std::string nameH = makeName(nameBase, std::string("_h"));
std::string nameW = makeName(nameBase, std::string("_w"));
std::string nameAll_ = nameAll;
if (nameAll_ == "")
nameAll_ = nameBase;
if (params.has(nameH) && params.has(nameW))
{
CV_Assert(params.get<int>(nameH) >= 0 && params.get<int>(nameW) >= 0);
parameter.push_back(params.get<int>(nameH));
parameter.push_back(params.get<int>(nameW));
return true;
}
else
{
if (params.has(nameAll_))
{
DictValue param = params.get(nameAll_);
for (int i = 0; i < param.size(); i++) {
CV_Assert(param.get<int>(i) >= 0);
parameter.push_back(param.get<int>(i));
}
if (parameter.size() == 1)
parameter.resize(2, parameter[0]);
return true;
}
else
{
if (hasDefault)
{
parameter = defaultValue;
return true;
}
else
{
return false;
}
}
}
}
void getKernelSize(const LayerParams &params, std::vector<size_t>& kernel)
{
if (!util::getParameter(params, "kernel", "kernel_size", kernel))
CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
for (int i = 0; i < kernel.size(); i++)
CV_Assert(kernel[i] > 0);
}
void getStrideAndPadding(const LayerParams &params, std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end,
std::vector<size_t>& strides, cv::String& padMode, size_t kernel_size = 2)
{
if (params.has("pad_l") && params.has("pad_t") && params.has("pad_r") && params.has("pad_b")) {
CV_Assert(params.get<int>("pad_t") >= 0 && params.get<int>("pad_l") >= 0 &&
params.get<int>("pad_b") >= 0 && params.get<int>("pad_r") >= 0);
pads_begin.push_back(params.get<int>("pad_t"));
pads_begin.push_back(params.get<int>("pad_l"));
pads_end.push_back(params.get<int>("pad_b"));
pads_end.push_back(params.get<int>("pad_r"));
}
else {
util::getParameter(params, "pad", "pad", pads_begin, true, std::vector<size_t>(kernel_size, 0));
if (pads_begin.size() < 4)
pads_end = pads_begin;
else
{
pads_end = std::vector<size_t>(pads_begin.begin() + pads_begin.size() / 2, pads_begin.end());
pads_begin.resize(pads_begin.size() / 2);
}
CV_Assert(pads_begin.size() == pads_end.size());
}
util::getParameter(params, "stride", "stride", strides, true, std::vector<size_t>(kernel_size, 1));
padMode = "";
if (params.has("pad_mode"))
{
padMode = params.get<String>("pad_mode");
}
for (int i = 0; i < strides.size(); i++)
CV_Assert(strides[i] > 0);
}
}
void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, bool &globalPooling,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end,
std::vector<size_t>& strides, cv::String &padMode)
{
globalPooling = params.has("global_pooling") &&
params.get<bool>("global_pooling");
if (globalPooling)
{
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode);
if(params.has("kernel_h") || params.has("kernel_w") || params.has("kernel_size"))
{
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
}
for (int i = 0; i < pads_begin.size(); i++) {
if (pads_begin[i] != 0 || pads_end[i] != 0)
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0");
}
for (int i = 0; i < strides.size(); i++) {
if (strides[i] != 1)
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1");
}
}
else
{
util::getKernelSize(params, kernel);
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
}
}
void getConvolutionKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides,
std::vector<size_t>& dilations, cv::String &padMode, std::vector<size_t>& adjust_pads)
{
util::getKernelSize(params, kernel);
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
util::getParameter(params, "dilation", "dilation", dilations, true, std::vector<size_t>(kernel.size(), 1));
util::getParameter(params, "adj", "adj", adjust_pads, true, std::vector<size_t>(kernel.size(), 0));
for (int i = 0; i < dilations.size(); i++)
CV_Assert(dilations[i] > 0);
}
// From TensorFlow code:
// Total padding on rows and cols is
// Pr = (R' - 1) * S + Kr - R
// Pc = (C' - 1) * S + Kc - C
// where (R', C') are output dimensions, (R, C) are input dimensions, S
// is stride, (Kr, Kc) are filter dimensions.
// We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top
// and Pc - Pc/2 on the bottom. When Pr or Pc is odd, this means
// we pad more on the right and bottom than on the top and left.
void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,
const std::vector<size_t>& stride, const String &padMode,
const std::vector<size_t>& dilation, std::vector<int>& out)
{
if (padMode == "VALID")
{
for (int i = 0; i < inp.size(); i++)
out.push_back((inp[i] - dilation[i] * (kernel[i] - 1) - 1 + stride[i]) / stride[i]);
}
else if (padMode == "SAME")
{
for (int i = 0; i < inp.size(); i++)
out.push_back((inp[i] - 1 + stride[i]) / stride[i]);
}
else
{
CV_Error(Error::StsError, "Unsupported padding mode");
}
}
void getConvPoolPaddings(const std::vector<int>& inp, const std::vector<size_t>& kernel,
const std::vector<size_t>& strides, const String &padMode,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end)
{
if (padMode == "SAME" || padMode == "VALID")
{
pads_begin.assign(kernel.size(), 0);
pads_end.assign(kernel.size(), 0);
}
if (padMode == "SAME")
{
CV_Assert_N(kernel.size() == strides.size(), kernel.size() == inp.size());
for (int i = 0; i < pads_begin.size(); i++) {
// There are test cases with stride > kernel.
if (strides[i] <= kernel[i])
{
int pad = (kernel[i] - 1 - (inp[i] - 1 + strides[i]) % strides[i]) / 2;
pads_begin[i] = pads_end[i] = pad;
}
}
}
}
}
}

View File

@@ -0,0 +1,79 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
#define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#define CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
// dispatched AVX/AVX2 optimizations
#include "./layers_common.simd.hpp"
#include "layers/layers_common.simd_declarations.hpp"
#undef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
#ifdef HAVE_OPENCL
#include "../ocl4dnn/include/ocl4dnn.hpp"
#endif
namespace cv
{
namespace dnn
{
void getConvolutionKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
cv::String &padMode, std::vector<size_t>& adjust_pads);
void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, bool &globalPooling,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,
const std::vector<size_t>& stride, const String &padMode,
const std::vector<size_t>& dilation, std::vector<int>& out);
void getConvPoolPaddings(const std::vector<int>& inp, const std::vector<size_t>& kernel,
const std::vector<size_t>& strides, const String &padMode,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end);
}
}
#endif

View File

@@ -0,0 +1,485 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/hal/intrin.hpp"
namespace cv {
namespace dnn {
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
void fastConv( const float* weights, size_t wstep, const float* bias,
const float* rowbuf, float* output, const int* outShape,
int blockSize, int vecsize, int vecsize_aligned,
const float* relu, bool initOutput );
void fastGEMM1T( const float* vec, const float* weights,
size_t wstep, const float* bias,
float* dst, int nvecs, int vecsize );
void fastGEMM( const float* aptr, size_t astep, const float* bptr,
size_t bstep, float* cptr, size_t cstep,
int ma, int na, int nb );
#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX
#if !CV_FMA3 // AVX workaround
#undef _mm256_fmadd_ps
#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b))
#endif
void fastConv( const float* weights, size_t wstep, const float* bias,
const float* rowbuf, float* output, const int* outShape,
int blockSize, int vecsize, int vecsize_aligned,
const float* relu, bool initOutput )
{
int outCn = outShape[1];
size_t outPlaneSize = outShape[2]*outShape[3];
float r0 = 1.f, r1 = 1.f, r2 = 1.f;
__m128 vr0 = _mm_set1_ps(1.f), vr1 = vr0, vr2 = vr0, z = _mm_setzero_ps();
// now compute dot product of the weights
// and im2row-transformed part of the tensor
for( int i = 0; i < outCn; i += 3 )
{
const float* wptr0 = weights + i*wstep;
const float* wptr1 = wptr0 + wstep;
const float* wptr2 = wptr1 + wstep;
float* outptr0 = output + i*outPlaneSize;
float* outptr1 = outptr0 + outPlaneSize;
float* outptr2 = outptr1 + outPlaneSize;
float bias0 = bias[i], bias1 = bias[i+1], bias2 = bias[i+2];
if( i+2 >= outCn )
{
wptr2 = wptr1;
outptr2 = outptr1;
bias2 = bias1;
if( i+1 >= outCn )
{
wptr2 = wptr1 = wptr0;
outptr2 = outptr1 = outptr0;
bias2 = bias1 = bias0;
}
}
if( relu )
{
r0 = relu[i]; r1 = relu[i+1]; r2 = relu[i+2];
if( i+2 >= outCn )
{
r2 = r1;
if( i+1 >= outCn )
r2 = r1 = r0;
}
vr0 = _mm_set1_ps(r0);
vr1 = _mm_set1_ps(r1);
vr2 = _mm_set1_ps(r2);
}
int j = 0;
for( ; j <= blockSize - 4; j += 4 )
{
int k = 0;
const float* rptr = rowbuf + j*vecsize_aligned;
__m256 vs00 = _mm256_setzero_ps(), vs01 = _mm256_setzero_ps(),
vs02 = _mm256_setzero_ps(), vs03 = _mm256_setzero_ps(),
vs10 = _mm256_setzero_ps(), vs11 = _mm256_setzero_ps(),
vs12 = _mm256_setzero_ps(), vs13 = _mm256_setzero_ps(),
vs20 = _mm256_setzero_ps(), vs21 = _mm256_setzero_ps(),
vs22 = _mm256_setzero_ps(), vs23 = _mm256_setzero_ps();
#if CV_AVX512_SKX // AVX512VL is necessary to avoid register spilling
if (vecsize >= 32)
{
__m512 vs00_5 = _mm512_setzero_ps(), vs01_5 = _mm512_setzero_ps(),
vs02_5 = _mm512_setzero_ps(), vs03_5 = _mm512_setzero_ps(),
vs10_5 = _mm512_setzero_ps(), vs11_5 = _mm512_setzero_ps(),
vs12_5 = _mm512_setzero_ps(), vs13_5 = _mm512_setzero_ps(),
vs20_5 = _mm512_setzero_ps(), vs21_5 = _mm512_setzero_ps(),
vs22_5 = _mm512_setzero_ps(), vs23_5 = _mm512_setzero_ps();
for (; k <= vecsize - 16; k += 16, rptr += 16)
{
__m512 w0 = _mm512_loadu_ps(wptr0 + k);
__m512 w1 = _mm512_loadu_ps(wptr1 + k);
__m512 w2 = _mm512_loadu_ps(wptr2 + k);
__m512 r0 = _mm512_loadu_ps(rptr);
vs00_5 = _mm512_fmadd_ps(w0, r0, vs00_5);
vs10_5 = _mm512_fmadd_ps(w1, r0, vs10_5);
vs20_5 = _mm512_fmadd_ps(w2, r0, vs20_5);
r0 = _mm512_loadu_ps(rptr + vecsize_aligned);
vs01_5 = _mm512_fmadd_ps(w0, r0, vs01_5);
vs11_5 = _mm512_fmadd_ps(w1, r0, vs11_5);
vs21_5 = _mm512_fmadd_ps(w2, r0, vs21_5);
r0 = _mm512_loadu_ps(rptr + vecsize_aligned*2);
vs02_5 = _mm512_fmadd_ps(w0, r0, vs02_5);
vs12_5 = _mm512_fmadd_ps(w1, r0, vs12_5);
vs22_5 = _mm512_fmadd_ps(w2, r0, vs22_5);
r0 = _mm512_loadu_ps(rptr + vecsize_aligned*3);
vs03_5 = _mm512_fmadd_ps(w0, r0, vs03_5);
vs13_5 = _mm512_fmadd_ps(w1, r0, vs13_5);
vs23_5 = _mm512_fmadd_ps(w2, r0, vs23_5);
}
/*
* now fold the 512 bit accumulator vectors into 256 bit vectors so that the AVX2 code can finish
* the tail of the vector
*/
vs00 = _mm256_add_ps( _mm512_extractf32x8_ps(vs00_5, 0), _mm512_extractf32x8_ps(vs00_5, 1));
vs10 = _mm256_add_ps( _mm512_extractf32x8_ps(vs10_5, 0), _mm512_extractf32x8_ps(vs10_5, 1));
vs20 = _mm256_add_ps( _mm512_extractf32x8_ps(vs20_5, 0), _mm512_extractf32x8_ps(vs20_5, 1));
vs01 = _mm256_add_ps( _mm512_extractf32x8_ps(vs01_5, 0), _mm512_extractf32x8_ps(vs01_5, 1));
vs11 = _mm256_add_ps( _mm512_extractf32x8_ps(vs11_5, 0), _mm512_extractf32x8_ps(vs11_5, 1));
vs21 = _mm256_add_ps( _mm512_extractf32x8_ps(vs21_5, 0), _mm512_extractf32x8_ps(vs21_5, 1));
vs02 = _mm256_add_ps( _mm512_extractf32x8_ps(vs02_5, 0), _mm512_extractf32x8_ps(vs02_5, 1));
vs12 = _mm256_add_ps( _mm512_extractf32x8_ps(vs12_5, 0), _mm512_extractf32x8_ps(vs12_5, 1));
vs22 = _mm256_add_ps( _mm512_extractf32x8_ps(vs22_5, 0), _mm512_extractf32x8_ps(vs22_5, 1));
vs03 = _mm256_add_ps( _mm512_extractf32x8_ps(vs03_5, 0), _mm512_extractf32x8_ps(vs03_5, 1));
vs13 = _mm256_add_ps( _mm512_extractf32x8_ps(vs13_5, 0), _mm512_extractf32x8_ps(vs13_5, 1));
vs23 = _mm256_add_ps( _mm512_extractf32x8_ps(vs23_5, 0), _mm512_extractf32x8_ps(vs23_5, 1));
}
#endif
for (; k < vecsize; k += 8, rptr += 8 )
{
__m256 w0 = _mm256_load_ps(wptr0 + k);
__m256 w1 = _mm256_load_ps(wptr1 + k);
__m256 w2 = _mm256_load_ps(wptr2 + k);
__m256 r0 = _mm256_load_ps(rptr);
vs00 = _mm256_fmadd_ps(w0, r0, vs00);
vs10 = _mm256_fmadd_ps(w1, r0, vs10);
vs20 = _mm256_fmadd_ps(w2, r0, vs20);
r0 = _mm256_load_ps(rptr + vecsize_aligned);
vs01 = _mm256_fmadd_ps(w0, r0, vs01);
vs11 = _mm256_fmadd_ps(w1, r0, vs11);
vs21 = _mm256_fmadd_ps(w2, r0, vs21);
r0 = _mm256_load_ps(rptr + vecsize_aligned*2);
vs02 = _mm256_fmadd_ps(w0, r0, vs02);
vs12 = _mm256_fmadd_ps(w1, r0, vs12);
vs22 = _mm256_fmadd_ps(w2, r0, vs22);
r0 = _mm256_load_ps(rptr + vecsize_aligned*3);
vs03 = _mm256_fmadd_ps(w0, r0, vs03);
vs13 = _mm256_fmadd_ps(w1, r0, vs13);
vs23 = _mm256_fmadd_ps(w2, r0, vs23);
}
__m256 t0 = _mm256_hadd_ps(_mm256_hadd_ps(vs00, vs01), _mm256_hadd_ps(vs02, vs03));
__m256 t1 = _mm256_hadd_ps(_mm256_hadd_ps(vs10, vs11), _mm256_hadd_ps(vs12, vs13));
__m256 t2 = _mm256_hadd_ps(_mm256_hadd_ps(vs20, vs21), _mm256_hadd_ps(vs22, vs23));
t0 = _mm256_add_ps(t0, _mm256_permute2f128_ps(t0, t0, 1));
t1 = _mm256_add_ps(t1, _mm256_permute2f128_ps(t1, t1, 1));
t2 = _mm256_add_ps(t2, _mm256_permute2f128_ps(t2, t2, 1));
__m128 s0, s1, s2;
if( initOutput )
{
s0 = _mm_set1_ps(bias0);
s1 = _mm_set1_ps(bias1);
s2 = _mm_set1_ps(bias2);
}
else
{
s0 = _mm_loadu_ps(outptr0 + j);
s1 = _mm_loadu_ps(outptr1 + j);
s2 = _mm_loadu_ps(outptr2 + j);
}
s0 = _mm_add_ps(s0, _mm256_castps256_ps128(t0));
s1 = _mm_add_ps(s1, _mm256_castps256_ps128(t1));
s2 = _mm_add_ps(s2, _mm256_castps256_ps128(t2));
if( relu )
{
__m128 m0 = _mm_cmp_ps(s0, z, _CMP_GT_OS);
__m128 m1 = _mm_cmp_ps(s1, z, _CMP_GT_OS);
__m128 m2 = _mm_cmp_ps(s2, z, _CMP_GT_OS);
s0 = _mm_xor_ps(s0, _mm_andnot_ps(m0, _mm_xor_ps(_mm_mul_ps(s0, vr0), s0)));
s1 = _mm_xor_ps(s1, _mm_andnot_ps(m1, _mm_xor_ps(_mm_mul_ps(s1, vr1), s1)));
s2 = _mm_xor_ps(s2, _mm_andnot_ps(m2, _mm_xor_ps(_mm_mul_ps(s2, vr2), s2)));
}
_mm_storeu_ps(outptr0 + j, s0);
_mm_storeu_ps(outptr1 + j, s1);
_mm_storeu_ps(outptr2 + j, s2);
}
for( ; j < blockSize; j++ )
{
const float* rptr = rowbuf + j*vecsize_aligned;
float s00, s10, s20;
if( initOutput )
{
s00 = bias0;
s10 = bias1;
s20 = bias2;
}
else
{
s00 = outptr0[j];
s10 = outptr1[j];
s20 = outptr2[j];
}
for( int k = 0; k < vecsize; k++ )
{
float r0 = rptr[k];
s00 += wptr0[k]*r0;
s10 += wptr1[k]*r0;
s20 += wptr2[k]*r0;
}
if( relu )
{
s00 = s00 > 0.f ? s00 : s00*r0;
s10 = s10 > 0.f ? s10 : s10*r1;
s20 = s20 > 0.f ? s20 : s20*r2;
}
outptr0[j] = s00;
outptr1[j] = s10;
outptr2[j] = s20;
}
}
_mm256_zeroupper();
}
// dst = vec * weights^t + bias
void fastGEMM1T( const float* vec, const float* weights,
size_t wstep, const float* bias,
float* dst, int nvecs, int vecsize )
{
int i = 0;
for( ; i <= nvecs - 8; i += 8 )
{
const float* wptr = weights + i*wstep;
__m256 vs0 = _mm256_setzero_ps(), vs1 = _mm256_setzero_ps(),
vs2 = _mm256_setzero_ps(), vs3 = _mm256_setzero_ps(),
vs4 = _mm256_setzero_ps(), vs5 = _mm256_setzero_ps(),
vs6 = _mm256_setzero_ps(), vs7 = _mm256_setzero_ps();
for( int k = 0; k < vecsize; k += 8, wptr += 8 )
{
__m256 v = _mm256_load_ps(vec + k);
vs0 = _mm256_fmadd_ps(_mm256_load_ps(wptr), v, vs0);
vs1 = _mm256_fmadd_ps(_mm256_load_ps(wptr + wstep), v, vs1);
vs2 = _mm256_fmadd_ps(_mm256_load_ps(wptr + wstep*2), v, vs2);
vs3 = _mm256_fmadd_ps(_mm256_load_ps(wptr + wstep*3), v, vs3);
vs4 = _mm256_fmadd_ps(_mm256_load_ps(wptr + wstep*4), v, vs4);
vs5 = _mm256_fmadd_ps(_mm256_load_ps(wptr + wstep*5), v, vs5);
vs6 = _mm256_fmadd_ps(_mm256_load_ps(wptr + wstep*6), v, vs6);
vs7 = _mm256_fmadd_ps(_mm256_load_ps(wptr + wstep*7), v, vs7);
}
__m256 s0 = _mm256_hadd_ps(_mm256_hadd_ps(vs0, vs1), _mm256_hadd_ps(vs2, vs3));
__m256 s1 = _mm256_hadd_ps(_mm256_hadd_ps(vs4, vs5), _mm256_hadd_ps(vs6, vs7));
s0 = _mm256_add_ps(s0, _mm256_permute2f128_ps(s0, s0, 1));
s1 = _mm256_add_ps(s1, _mm256_permute2f128_ps(s1, s1, 1));
s0 = _mm256_add_ps(s0, _mm256_castps128_ps256(_mm_loadu_ps(bias + i)));
s1 = _mm256_add_ps(s1, _mm256_castps128_ps256(_mm_loadu_ps(bias + i + 4)));
_mm_storeu_ps(dst + i, _mm256_castps256_ps128(s0));
_mm_storeu_ps(dst + i + 4, _mm256_castps256_ps128(s1));
}
float temp = 0.f;
for( ; i < nvecs; i++ )
{
const float* wptr = weights + i*wstep;
__m256 vs0 = _mm256_setzero_ps();
for( int k = 0; k < vecsize; k += 8, wptr += 8 )
{
__m256 v = _mm256_load_ps(vec + k);
vs0 = _mm256_fmadd_ps(_mm256_load_ps(wptr), v, vs0);
}
__m256 s0 = _mm256_hadd_ps(_mm256_hadd_ps(vs0, vs0), vs0);
s0 = _mm256_add_ps(s0, _mm256_permute2f128_ps(s0, s0, 1));
_mm_store_ss(&temp, _mm256_castps256_ps128(s0));
dst[i] = temp + bias[i];
}
_mm256_zeroupper();
}
void fastGEMM( const float* aptr, size_t astep, const float* bptr,
size_t bstep, float* cptr, size_t cstep,
int ma, int na, int nb )
{
int n = 0;
#if CV_AVX512_SKX // AVX512VL is necessary to avoid register spilling
for( ; n <= nb - 32; n += 32 )
{
for( int m = 0; m < ma; m += 4 )
{
const float* aptr0 = aptr + astep*m;
const float* aptr1 = aptr + astep*std::min(m+1, ma-1);
const float* aptr2 = aptr + astep*std::min(m+2, ma-1);
const float* aptr3 = aptr + astep*std::min(m+3, ma-1);
float* cptr0 = cptr + cstep*m;
float* cptr1 = cptr + cstep*std::min(m+1, ma-1);
float* cptr2 = cptr + cstep*std::min(m+2, ma-1);
float* cptr3 = cptr + cstep*std::min(m+3, ma-1);
__m512 d00 = _mm512_setzero_ps(), d01 = _mm512_setzero_ps();
__m512 d10 = _mm512_setzero_ps(), d11 = _mm512_setzero_ps();
__m512 d20 = _mm512_setzero_ps(), d21 = _mm512_setzero_ps();
__m512 d30 = _mm512_setzero_ps(), d31 = _mm512_setzero_ps();
for( int k = 0; k < na; k++ )
{
__m512 a0 = _mm512_set1_ps(aptr0[k]);
__m512 a1 = _mm512_set1_ps(aptr1[k]);
__m512 a2 = _mm512_set1_ps(aptr2[k]);
__m512 a3 = _mm512_set1_ps(aptr3[k]);
__m512 b0 = _mm512_loadu_ps(bptr + k*bstep + n);
__m512 b1 = _mm512_loadu_ps(bptr + k*bstep + n + 16);
d00 = _mm512_fmadd_ps(a0, b0, d00);
d01 = _mm512_fmadd_ps(a0, b1, d01);
d10 = _mm512_fmadd_ps(a1, b0, d10);
d11 = _mm512_fmadd_ps(a1, b1, d11);
d20 = _mm512_fmadd_ps(a2, b0, d20);
d21 = _mm512_fmadd_ps(a2, b1, d21);
d30 = _mm512_fmadd_ps(a3, b0, d30);
d31 = _mm512_fmadd_ps(a3, b1, d31);
}
_mm512_storeu_ps(cptr0 + n, d00);
_mm512_storeu_ps(cptr0 + n + 16, d01);
_mm512_storeu_ps(cptr1 + n, d10);
_mm512_storeu_ps(cptr1 + n + 16, d11);
_mm512_storeu_ps(cptr2 + n, d20);
_mm512_storeu_ps(cptr2 + n + 16, d21);
_mm512_storeu_ps(cptr3 + n, d30);
_mm512_storeu_ps(cptr3 + n + 16, d31);
}
}
#endif
for( ; n <= nb - 16; n += 16 )
{
for( int m = 0; m < ma; m += 4 )
{
const float* aptr0 = aptr + astep*m;
const float* aptr1 = aptr + astep*std::min(m+1, ma-1);
const float* aptr2 = aptr + astep*std::min(m+2, ma-1);
const float* aptr3 = aptr + astep*std::min(m+3, ma-1);
float* cptr0 = cptr + cstep*m;
float* cptr1 = cptr + cstep*std::min(m+1, ma-1);
float* cptr2 = cptr + cstep*std::min(m+2, ma-1);
float* cptr3 = cptr + cstep*std::min(m+3, ma-1);
__m256 d00 = _mm256_setzero_ps(), d01 = _mm256_setzero_ps();
__m256 d10 = _mm256_setzero_ps(), d11 = _mm256_setzero_ps();
__m256 d20 = _mm256_setzero_ps(), d21 = _mm256_setzero_ps();
__m256 d30 = _mm256_setzero_ps(), d31 = _mm256_setzero_ps();
for( int k = 0; k < na; k++ )
{
__m256 a0 = _mm256_set1_ps(aptr0[k]);
__m256 a1 = _mm256_set1_ps(aptr1[k]);
__m256 a2 = _mm256_set1_ps(aptr2[k]);
__m256 a3 = _mm256_set1_ps(aptr3[k]);
__m256 b0 = _mm256_loadu_ps(bptr + k*bstep + n);
__m256 b1 = _mm256_loadu_ps(bptr + k*bstep + n + 8);
d00 = _mm256_fmadd_ps(a0, b0, d00);
d01 = _mm256_fmadd_ps(a0, b1, d01);
d10 = _mm256_fmadd_ps(a1, b0, d10);
d11 = _mm256_fmadd_ps(a1, b1, d11);
d20 = _mm256_fmadd_ps(a2, b0, d20);
d21 = _mm256_fmadd_ps(a2, b1, d21);
d30 = _mm256_fmadd_ps(a3, b0, d30);
d31 = _mm256_fmadd_ps(a3, b1, d31);
}
_mm256_storeu_ps(cptr0 + n, d00);
_mm256_storeu_ps(cptr0 + n + 8, d01);
_mm256_storeu_ps(cptr1 + n, d10);
_mm256_storeu_ps(cptr1 + n + 8, d11);
_mm256_storeu_ps(cptr2 + n, d20);
_mm256_storeu_ps(cptr2 + n + 8, d21);
_mm256_storeu_ps(cptr3 + n, d30);
_mm256_storeu_ps(cptr3 + n + 8, d31);
}
}
for( ; n < nb; n++ )
{
for( int m = 0; m < ma; m++ )
{
const float* aptr0 = aptr + astep*m;
float* cptr0 = cptr + cstep*m;
float d0 = 0.f;
for( int k = 0; k < na; k++ )
d0 += aptr0[k]*bptr[k*bstep + n];
cptr0[n] = d0;
}
}
_mm256_zeroupper();
}
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
CV_CPU_OPTIMIZATION_NAMESPACE_END
}} // namespace

View File

@@ -0,0 +1,522 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "../op_vkcom.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/dnn/shape_utils.hpp"
#include "opencv2/core/hal/hal.hpp"
#include <algorithm>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
using namespace cv::dnn::ocl4dnn;
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/lrn.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class LRNLayerImpl CV_FINAL : public LRNLayer
{
public:
LRNLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
type = -1;
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
if (nrmType == "ACROSS_CHANNELS")
type = CHANNEL_NRM;
else if (nrmType == "WITHIN_CHANNEL")
type = SPATIAL_NRM;
else
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
size = params.get<int>("local_size", 5);
if (size % 2 != 1 || size <= 0)
CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");
alpha = params.get<double>("alpha", 1);
beta = params.get<double>("beta", 0.75);
bias = params.get<double>("bias", 1);
normBySize = params.get<bool>("norm_by_size", true);
}
#ifdef HAVE_OPENCL
Ptr<OCL4DNNLRN<float> > lrnOp;
#endif
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) {
return bias == (int)bias;
}
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
return type == CHANNEL_NRM && bias == (int)bias;
}
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_HALIDE ||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() && (size % 2 == 1) && (type == CHANNEL_NRM));
}
#ifdef HAVE_OPENCL
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
{
lrnOp.release();
}
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (lrnOp.empty())
{
OCL4DNNLRNConfig config;
config.lrn_type = type == CHANNEL_NRM ?
LRNParameter_NormRegion_ACROSS_CHANNELS :
LRNParameter_NormRegion_WITHIN_CHANNEL;
CHECK_EQ(size % 2, 1)<< "LRN only supports odd values for local_size";
config.local_size = size;
config.alpha = alpha;
config.beta = beta;
config.k = bias;
CHECK_EQ(4, inputs[0].dims) << "Input must have 4 axes, "
<< "corresponding to (num, channels, height, width)";
config.batch_size = inputs[0].size[0];
config.channels = inputs[0].size[1];
config.height = inputs[0].size[2];
config.width = inputs[0].size[3];
config.norm_by_size = normBySize;
config.use_half = use_half;
lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));
}
if (!lrnOp->Forward(inputs[0], outputs[0]))
return false;
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_Assert(inputs_arr.total() == outputs_arr.total());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(inputs.size() == outputs.size());
for (int i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i].dims == 4);
Mat &src = inputs[i];
Mat &dst = outputs[i];
switch (type)
{
case CHANNEL_NRM:
channelNormalization(src, dst);
break;
case SPATIAL_NRM:
spatialNormalization(src, dst);
break;
default:
CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer");
break;
}
}
}
class ChannelLRN : public ParallelLoopBody
{
public:
ChannelLRN(const float* src, float* dst, int channels, int ksize,
float alpha1, float bias1, float beta1,
size_t planeSize, int nsamples, int nstripes)
{
src_ = src; dst_ = dst;
channels_ = channels;
ksize_ = ksize;
alpha1_ = alpha1; bias1_ = bias1; beta1_ = beta1;
planeSize_ = planeSize; nsamples_ = nsamples; nstripes_ = nstripes;
}
void operator()(const Range& r) const CV_OVERRIDE
{
int nsamples = nsamples_, nstripes = nstripes_;
size_t planeSize = planeSize_, planeSize_n = planeSize * nsamples;
size_t elemsPerStripe = (planeSize_n + nstripes - 1)/nstripes;
size_t rstart = r.start*elemsPerStripe;
size_t rend = r.end == nstripes ? planeSize_n : r.end*elemsPerStripe;
rstart = std::min(rstart, planeSize_n);
rend = std::min(rend, planeSize_n);
float alpha1 = alpha1_, bias1 = bias1_, beta1 = beta1_;
int k, channels = channels_, ksize = ksize_;
AutoBuffer<float> buf_((channels + ksize + 1)*2);
float* acc = buf_.data();
float* buf = acc + channels + ksize + 1;
for( k = 0; k <= ksize; k++ )
buf[-k-1] = buf[channels + k] = 0.f;
for( size_t ofs = rstart; ofs < rend; )
{
int sampleIdx = (int)(ofs/planeSize);
if( sampleIdx >= nsamples )
break;
size_t ofs0 = ofs - sampleIdx*planeSize;
size_t ofs1 = std::min(planeSize - ofs0, rend - ofs) + ofs;
const float* src = src_ + sampleIdx*planeSize*channels + ofs0;
float* dst = dst_ + sampleIdx*planeSize*channels + ofs0;
for( ; ofs < ofs1; ofs++, src++, dst++ )
{
for( k = 0; k < channels; k++ )
buf[k] = src[k*planeSize];
float s = 0;
for( k = 0; k < ksize; k++ )
s += buf[k]*buf[k];
for( k = 0; k < channels; k++ )
{
float x1 = buf[k + ksize];
float x0 = buf[k - ksize - 1];
s = std::max(s + (x1 + x0)*(x1 - x0), 0.f);
acc[k] = (float)(alpha1*s + bias1);
}
hal::log32f(acc, acc, channels);
for( k = 0; k < channels; k++ )
acc[k] *= beta1;
hal::exp32f(acc, acc, channels);
for( k = 0; k < channels; k++ )
dst[k*planeSize] = buf[k]*acc[k];
}
}
}
const float* src_;
float* dst_;
float alpha1_, bias1_, beta1_;
size_t planeSize_;
int channels_, ksize_, nsamples_, nstripes_;
};
void channelNormalization(Mat &srcBlob, Mat &dstBlob)
{
int num = srcBlob.size[0];
int channels = srcBlob.size[1];
int ksize = (size - 1) / 2;
int sizeNormFactor = normBySize ? size : 1;
size_t planeSize = srcBlob.size[2]*srcBlob.size[3];
int nstripes = std::max(getNumThreads(), 1);
ChannelLRN clrn(srcBlob.ptr<float>(), dstBlob.ptr<float>(), channels,
ksize, alpha/sizeNormFactor, bias, -beta, planeSize, num, nstripes);
parallel_for_(Range(0, nstripes), clrn, nstripes);
}
void sqrBoxFilter_(const Mat &src, Mat &dst)
{
Mat srcRawWrapper(src.rows, src.cols, src.type(), src.data, src.step[0]);
cv::sqrBoxFilter(srcRawWrapper, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);
}
void spatialNormalization(Mat &srcBlob, Mat &dstBlob)
{
int num = srcBlob.size[0];
int channels = srcBlob.size[1];
int sizeNormFactor = normBySize ? size*size : 1;
Mat srcMat = srcBlob;
Mat dstMat = dstBlob;
for (int n = 0; n < num; n++)
{
for (int cn = 0; cn < channels; cn++)
{
Mat src = getPlane(srcMat, n, cn);
Mat dst = getPlane(dstMat, n, cn);
sqrBoxFilter_(src, dst);
dst.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias);
cv::pow(dst, beta, dst);
cv::divide(src, dst, dst);
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
cuda4dnn::LRNType type_;
if (type == CHANNEL_NRM)
type_ = cuda4dnn::LRNType::ACROSS_CHANNELS;
else if (type == SPATIAL_NRM)
type_ = cuda4dnn::LRNType::WITHIN_CHANNEL;
else
CV_Error(Error::StsNotImplemented, "Unknown normalization region");
float alphaSize = alpha;
if (!normBySize) {
switch (type) {
case CHANNEL_NRM: alphaSize = alpha * size; break;
case SPATIAL_NRM: alphaSize = alpha * size * size; break;
}
}
std::size_t largestInputSize = 0;
for(auto& wrapper : inputs) {
auto input_wrapper = wrapper.dynamicCast<CUDABackendWrapper>();
auto shape = input_wrapper->getShape();
largestInputSize = std::max<std::size_t>(
largestInputSize,
std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<int>())
);
}
return make_cuda_node<cuda4dnn::LRNOp>(preferableTarget,
std::move(context->cudnn_handle), type_, size, alphaSize, beta, bias, largestInputSize);
}
#endif
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_VULKAN
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpLRN(size / 2, bias, alpha, beta, normBySize));
return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
#endif
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
float alphaSize = alpha;
if (normBySize)
alphaSize /= (type == CHANNEL_NRM ? size : size * size);
int width, height, channels, numImgs;
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
getCanonicalSize(inputBuffer, &width, &height, &channels, &numImgs);
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Func padded_sq(name + "_padded_sq");
Halide::Func sq("sq");
sq(x, y, c, n) = inputBuffer(x, y, c, n) * inputBuffer(x, y, c, n);
Halide::Func bounded =
Halide::BoundaryConditions::constant_exterior(sq, 0, 0, width,
0, height,
0, channels,
0, numImgs);
padded_sq(x, y, c, n) = bounded(x, y, c, n);
Halide::Expr base;
if (type == CHANNEL_NRM)
{
Halide::RDom r((1 - size) / 2, size);
base = alphaSize * sum(padded_sq(x, y, c + r, n));
}
else // SPATIAL_NRM
{
Halide::RDom r((1 - size) / 2, size, (1 - size) / 2, size);
base = alphaSize * sum(padded_sq(x + r.x, y + r.y, c, n));
}
base += static_cast<float>(bias);
top(x, y, c, n) = inputBuffer(x, y, c, n) / pow(base, beta);
return Ptr<BackendNode>(new HalideBackendNode({ padded_sq, top }));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
virtual void applyHalideScheduler(Ptr<BackendNode>& node,
const std::vector<Mat*> &inputs,
const std::vector<Mat> &outputs,
int targetId) const CV_OVERRIDE
{
#ifdef HAVE_HALIDE
if (targetId != DNN_TARGET_CPU)
{
Layer::applyHalideScheduler(node, inputs, outputs, targetId);
return;
}
int outW, outH, outC, outN;
getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
Halide::Var x("x"), y("y"), c("c"), n("n"), yo("yo"), yi("yi"), tile("tile");
Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs[1];
Halide::Func& padded_sq = node.dynamicCast<HalideBackendNode>()->funcs[0];
if (outW < 8 || outH <= 2)
return;
top.reorder(x, c, y, n)
.split(y, yo, yi, 2)
.fuse(yo, n, tile)
.parallel(tile)
.unroll(yi)
.vectorize(x, 8);
padded_sq.store_at(top, tile)
.compute_at(top, yi);
#endif // HAVE_HALIDE
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
float alphaSize = alpha;
if (!normBySize)
alphaSize *= (type == SPATIAL_NRM ? size*size : size);
InferenceEngine::Builder::NormLayer ieLayer(name);
ieLayer.setSize(size);
ieLayer.setAlpha(alphaSize);
ieLayer.setBeta(beta);
ieLayer.setAcrossMaps(type == CHANNEL_NRM);
InferenceEngine::Builder::Layer l = ieLayer;
l.getParameters()["k"] = bias;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
float alphaSize = alpha;
if (!normBySize)
alphaSize *= (type == SPATIAL_NRM ? size*size : size);
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto lrn = std::make_shared<ngraph::op::LRN>(ieInpNode, (double)alphaSize, (double)beta, (double)bias, (size_t)size);
return Ptr<BackendNode>(new InfEngineNgraphNode(lrn));
}
#endif // HAVE_DNN_NGRAPH
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
CV_Assert(inputs.size() > 0);
long flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
if (type == CHANNEL_NRM)
{
int channels = inputs[i][1];
int ksize = (size - 1) / 2;
flops += inputs[i][0]*(std::min(ksize, channels)*2*total(inputs[i], 2) + channels*4*total(inputs[i], 2));
if (ksize < channels)
{
flops += (size + 2*(channels - size))*total(inputs[i], 2);
}
}
else
{
flops += total(inputs[i])*(2*size*size + 2);
}
}
return flops;
}
private:
enum Type
{
CHANNEL_NRM,
SPATIAL_NRM
};
};
Ptr<LRNLayer> LRNLayer::create(const LayerParams& params)
{
return Ptr<LRNLayer>(new LRNLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,196 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of Batch Normalization layer.
*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/max_unpooling.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class MaxUnpoolLayerImpl CV_FINAL : public MaxUnpoolLayer
{
public:
MaxUnpoolLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
poolKernel = Size(params.get<int>("pool_k_w"), params.get<int>("pool_k_h"));
poolPad = Size(params.get<int>("pool_pad_w"), params.get<int>("pool_pad_h"));
poolStride = Size(params.get<int>("pool_stride_w"), params.get<int>("pool_stride_h"));
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 2 || inputs.size() == 3);
CV_Assert(total(inputs[0]) == total(inputs[1]));
MatShape outShape;
if (inputs.size() == 2)
{
outShape = inputs[0];
outShape[2] = (outShape[2] - 1) * poolStride.height + poolKernel.height - 2 * poolPad.height;
outShape[3] = (outShape[3] - 1) * poolStride.width + poolKernel.width - 2 * poolPad.width;
}
else
outShape = inputs[2];
outputs.clear();
outputs.push_back(outShape);
return false;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(inputs.size() == 2 || inputs.size() == 3);
Mat& input = inputs[0];
Mat& indices = inputs[1];
CV_Assert(input.total() == indices.total());
CV_Assert(input.size[0] == 1);
CV_Assert(input.isContinuous());
for(int i_n = 0; i_n < outputs.size(); i_n++)
{
Mat& outBlob = outputs[i_n];
outBlob.setTo(0);
CV_Assert(input.size[1] == outBlob.size[1]);
int outPlaneTotal = outBlob.size[2]*outBlob.size[3];
for (int i_c = 0; i_c < input.size[1]; i_c++)
{
Mat outPlane = getPlane(outBlob, 0, i_c);
int wh_area = input.size[2]*input.size[3];
const float* inptr = input.ptr<float>(0, i_c);
const float* idxptr = indices.ptr<float>(0, i_c);
float* outptr = outPlane.ptr<float>();
for(int i_wh = 0; i_wh < wh_area; i_wh++)
{
int index = idxptr[i_wh];
if (!(0 <= index && index < outPlaneTotal))
{
std::cerr
<< "i_n=" << i_n << std::endl
<< "i_c=" << i_c << std::endl
<< "i_wh=" << i_wh << std::endl
<< "index=" << index << std::endl
<< "maxval=" << inptr[i_wh] << std::endl
<< "outPlaneTotal=" << outPlaneTotal << std::endl
<< "input.size=" << input.size << std::endl
<< "indices.size=" << indices.size << std::endl
<< "outBlob=" << outBlob.size << std::endl
;
CV_Assert(0 <= index && index < outPlaneTotal);
}
outptr[index] = inptr[i_wh];
}
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
cuda4dnn::MaxUnpoolingConfiguration config;
auto& window_size = config.window_size;
window_size.resize(2);
window_size[0] = poolKernel.height;
window_size[1] = poolKernel.width;
auto& strides = config.strides;
strides.resize(2);
strides[0] = poolStride.height;
strides[1] = poolStride.width;
auto& pads_begin = config.pads_begin;
pads_begin.resize(2);
pads_begin[0] = poolPad.height;
pads_begin[1] = poolPad.width;
return make_cuda_node<cuda4dnn::MaxUnpoolingOp>(preferableTarget, std::move(context->stream), config);
}
#endif
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
// Meaningless operation if false because if kernel > stride
// it is not deterministic and if kernel < stride we just
// skip a part of input data (you'd better change your model).
if (poolKernel.width != poolStride.width ||
poolKernel.height != poolStride.height)
CV_Error(cv::Error::StsNotImplemented,
"Halide backend for maximum unpooling "
"is not support cases when kernel != stride");
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Buffer<float> inputBuffer = halideBuffer(input[0]);
Halide::Buffer<float> indices = halideBuffer(input[1]);
Halide::Expr pooledX = x / poolKernel.width;
Halide::Expr pooledY = y / poolKernel.height;
const int outW = inputBuffer.width() * poolKernel.width;
top(x, y, c, n) = select(y * outW + x == indices(pooledX, pooledY, c, n),
inputBuffer(pooledX, pooledY, c, n), 0.0f);
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
};
Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params)
{
return Ptr<MaxUnpoolLayer>(new MaxUnpoolLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,416 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_OPENCL
#include "../ocl4dnn/include/math_functions.hpp"
#include "opencl_kernels_dnn.hpp"
#endif
namespace cv
{
namespace dnn
{
class MVNLayerImpl CV_FINAL : public MVNLayer
{
public:
MVNLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
normVariance = params.get<bool>("normalize_variance", true);
acrossChannels = params.get<bool>("across_channels", false);
eps = params.get<double>("eps", 1e-9);
fuse_batch_norm = false;
fuse_relu = false;
relu_slope = 0.f;
zeroDev = false;
}
Mat scale, shift;
#ifdef HAVE_OPENCL
UMat umat_scale, umat_shift;
#endif
bool fuse_batch_norm;
Ptr<ReLULayer> activ_relu;
float relu_slope;
bool fuse_relu;
bool zeroDev; // TODO: Doesn't considered in Intel's Inference Engine backend.
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
{
if (!layer.empty() && !fuse_relu && !fuse_batch_norm)
{
layer->getScaleShift(scale, shift);
fuse_batch_norm = !scale.empty() || !shift.empty();
return fuse_batch_norm;
}
if (!layer.empty() && preferableTarget == DNN_TARGET_OPENCL)
{
activ_relu = layer.dynamicCast<ReLULayer>();
if( !activ_relu.empty() )
relu_slope = activ_relu->negativeSlope;
}
fuse_relu = !activ_relu.empty();
return fuse_relu;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
int splitDim = (acrossChannels) ? 1 : 2;
int i, newRows = 1;
for( i = 0; i < splitDim; i++ )
newRows *= inputs[0].size[i];
zeroDev = inputs[0].total() == newRows;
#ifdef HAVE_OPENCL
umat_scale.release();
umat_shift.release();
#endif
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return !zeroDev && (preferableTarget != DNN_TARGET_MYRIAD || eps <= 1e-7f);
else
#endif // HAVE_INF_ENGINE
return backendId == DNN_BACKEND_OPENCV;
}
#ifdef HAVE_OPENCL
bool fast_forward_ocl(std::vector<UMat> &inputs, std::vector<UMat> &outputs)
{
if (umat_scale.empty() && !scale.empty())
scale.copyTo(umat_scale);
if (umat_shift.empty() && !shift.empty())
shift.copyTo(umat_shift);
UMat& bnorm_weight = umat_scale;
UMat& bnorm_bias = umat_shift;
const unsigned LOCAL_SIZE = 128;
bool use_half = (inputs[0].depth() == CV_16S);
String opts = format(" -DT=%s -DT4=%s -Dconvert_T=%s -DLOCAL_SIZE=%u", use_half ? "half" : "float",
use_half ? "half4" : "float4", use_half ? "convert_half4" : "convert_float4",
LOCAL_SIZE
);
int splitDim = (acrossChannels) ? 1 : 2;
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{
UMat &inpMat = inputs[inpIdx];
UMat &outMat = outputs[inpIdx];
int newRows = total(shape(inpMat), 0, splitDim);
CV_Assert(newRows != 0);
MatShape s = shape(newRows, inpMat.total() / newRows);
UMat meanMat = UMat(s[0], 1, (use_half) ? CV_16S : CV_32F);
UMat tmpMat = UMat(s[0], s[1], CV_32F);
float alpha = 1.0f / s[1];
String buildopt = "-DNUM=4" + opts;
ocl::Kernel k("mean_fuse4", ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MEAN_FUSE");
size_t localsize[] = { LOCAL_SIZE };
size_t globalsize[] = { (size_t)s[0] / 4 * localsize[0] };
int argId = 0;
k.set(argId++, ocl::KernelArg::PtrReadOnly(inpMat));
k.set(argId++, (int)s[1]);
k.set(argId++, alpha);
k.set(argId++, ocl::KernelArg::PtrWriteOnly(meanMat));
k.set(argId++, ocl::KernelArg::PtrWriteOnly(tmpMat));
bool ret = k.run(1, globalsize, localsize, false);
if (!ret)
return false;
buildopt += format(" %s %s", (fuse_batch_norm) ? "-DFUSE_BATCH_NORM" : "",
(fuse_relu) ? "-DFUSE_RELU" : "");
ocl::Kernel k1("mvn_fuse4", ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MVN_FUSE");
argId = 0;
k1.set(argId++, ocl::KernelArg::PtrReadOnly(tmpMat));
k1.set(argId++, ocl::KernelArg::PtrReadOnly(inpMat));
k1.set(argId++, ocl::KernelArg::PtrReadOnly(meanMat));
k1.set(argId++, (int)s[1]);
k1.set(argId++, (float)alpha);
k1.set(argId++, (float)eps);
k1.set(argId++, (float)relu_slope);
k1.set(argId++, ocl::KernelArg::PtrReadOnly(bnorm_weight));
k1.set(argId++, ocl::KernelArg::PtrReadOnly(bnorm_bias));
k1.set(argId++, ocl::KernelArg::PtrWriteOnly(outMat));
ret = k1.run(1, globalsize, localsize, false);
if (!ret)
return false;
}
return true;
}
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
if (umat_scale.empty() && !scale.empty())
scale.copyTo(umat_scale);
if (umat_shift.empty() && !shift.empty())
shift.copyTo(umat_shift);
UMat& bnorm_weight = umat_scale;
UMat& bnorm_bias = umat_shift;
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
int splitDim = (acrossChannels) ? 1 : 2;
int row_size = total(shape(inputs[0]), 0, splitDim);
int plane_size = total(shape(inputs[0]), splitDim);
if (normVariance && (row_size % 4 == 0) && (plane_size % 4 == 0))
return fast_forward_ocl(inputs, outputs);
if (inputs[0].depth() == CV_16S)
return false;
String opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4");
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{
UMat &inpMat = inputs[inpIdx];
UMat &outMat = outputs[inpIdx];
int newRows = total(shape(inpMat), 0, splitDim);
CV_Assert(newRows != 0);
MatShape s = shape(newRows, inpMat.total() / newRows);
UMat oneMat = UMat::ones(s[1], 1, CV_32F);
UMat meanMat = UMat(s[0], 1, CV_32F);
UMat devMat = UMat(s[0], 1, CV_32F);
UMat tmpMat = UMat(s[0], s[1], CV_32F);
float alpha = 1.0f / s[1];
bool ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
inpMat, 0, oneMat, 0, 0.0f, meanMat, 0);
if (!ret)
return false;
int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
String buildopt = format("-DNUM=%d", number) + opts;
if (normVariance)
{
String kname = format("calc_mean%d", number);
ocl::Kernel kernel(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MEAN");
if (kernel.empty())
return false;
kernel.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
kernel.set(1, (int)s[0]);
kernel.set(2, (int)s[1]);
kernel.set(3, ocl::KernelArg::PtrReadOnly(meanMat));
kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmpMat));
ret = kernel.run(2, global, NULL, false);
if (!ret)
return false;
ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
tmpMat, 0, oneMat, 0, 0.0f, devMat, 0);
if (!ret)
return false;
}
String kname = format("mvn%d", number);
buildopt += format("%s%s%s -DKERNEL_MVN", (normVariance) ? " -DNORM_VARIANCE" : "",
(fuse_batch_norm) ? " -DFUSE_BATCH_NORM" : "",
(fuse_relu) ? " -DFUSE_RELU" : "");
ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
if (kernel1.empty())
return false;
kernel1.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
kernel1.set(1, (int)s[0]);
kernel1.set(2, (int)s[1]);
kernel1.set(3, (float)eps);
kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat));
kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat));
kernel1.set(6, ocl::KernelArg::PtrReadOnly(bnorm_weight));
kernel1.set(7, ocl::KernelArg::PtrReadOnly(bnorm_bias));
kernel1.set(8, (int)inpMat.size[1]);
kernel1.set(9, (float)relu_slope);
kernel1.set(10, ocl::KernelArg::PtrWriteOnly(outMat));
ret = kernel1.run(2, global, NULL, false);
if (!ret)
return false;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{
Mat &inpBlob = inputs[inpIdx];
Mat &outBlob = outputs[inpIdx];
int splitDim = (acrossChannels) ? 1 : 2;
int i, newRows = 1;
for( i = 0; i < splitDim; i++ )
newRows *= inpBlob.size[i];
Mat inpMat = inpBlob.reshape(1, newRows);
Mat outMat = outBlob.reshape(1, newRows);
if ( inpBlob.total() == newRows )
{
// MVN is applied to single values at an every row.
if (shift.empty())
{
outBlob.setTo(0);
}
else
{
for ( i = 0; i < newRows; i++ )
{
outMat.row(i).setTo(((float*)shift.data)[i]);
}
}
return;
}
Scalar mean, dev;
for ( i = 0; i < newRows; i++)
{
Mat inpRow = inpMat.row(i);
Mat outRow = outMat.row(i);
float weight = 1.f;
float bias = 0.f;
if (fuse_batch_norm)
{
weight = i < scale.cols ? ((float*)scale.data)[i] : weight;
bias = i < shift.cols ? ((float*)shift.data)[i] : bias;
}
cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray());
double alpha = 1;
if (normVariance)
{
alpha = 1 / std::sqrt(eps + dev[0]*dev[0]);
}
double normalizationScale = 1.0;
double normalizationShift = 0.0;
if (fuse_batch_norm)
{
normalizationScale = alpha * weight;
normalizationShift = -mean[0] * normalizationScale + bias;
}
else
{
normalizationScale = alpha;
normalizationShift = -mean[0] * alpha;
}
inpRow.convertTo(outRow, outRow.type(), normalizationScale, normalizationShift);
}
}
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::MVNLayer ieLayer(name);
ieLayer.setAcrossChannels(acrossChannels);
ieLayer.setNormalize(normVariance);
ieLayer.setEpsilon(eps);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto mvn = std::make_shared<ngraph::op::MVN>(ieInpNode, acrossChannels, normVariance, eps);
return Ptr<BackendNode>(new InfEngineNgraphNode(mvn));
}
#endif // HAVE_DNN_NGRAPH
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
long flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 6*total(inputs[i]) + 3*total(inputs[i], 0, normVariance ? 2 : 1);
}
return flops;
}
};
Ptr<MVNLayer> MVNLayer::create(const LayerParams& params)
{
return Ptr<MVNLayer>(new MVNLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,396 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/normalize_bbox.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv { namespace dnn {
class NormalizeBBoxLayerImpl CV_FINAL : public NormalizeBBoxLayer
{
public:
NormalizeBBoxLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
pnorm = params.get<float>("p", 2);
epsilon = params.get<float>("eps", 1e-10f);
acrossSpatial = params.get<bool>("across_spatial", true);
startAxis = params.get<int>("start_axis", 1);
CV_Assert(!params.has("across_spatial") || !params.has("end_axis"));
endAxis = params.get<int>("end_axis", acrossSpatial ? -1 : startAxis);
CV_Assert(pnorm > 0);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (pnorm != 2)
return false;
return preferableTarget == DNN_TARGET_MYRIAD ? !acrossSpatial : startAxis == 1;
}
return backendId == DNN_BACKEND_OPENCV ||
(backendId == DNN_BACKEND_CUDA && (pnorm == 1 || pnorm == 2));
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
internals.resize(1, inputs[0]);
internals[0][0] = 1; // Batch size.
return true;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
CV_Assert(inputs.size() == 1);
endAxis = endAxis == -1 ? (inputs[0].dims - 1) : endAxis;
startAxis = startAxis == -1 ? (inputs[0].dims - 1) : startAxis;
acrossSpatial = (startAxis == 1 && endAxis == inputs[0].dims - 1);
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
if (inputs_.depth() == CV_16S)
return false;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
internals_.getUMatVector(internals);
CV_Assert(inputs.size() == 1 && outputs.size() == 1);
CV_Assert(inputs[0].total() == outputs[0].total());
const UMat& inp0 = inputs[0];
UMat& buffer = internals[0];
startAxis = clamp(startAxis, inp0.dims);
endAxis = clamp(endAxis, inp0.dims);
size_t num = total(shape(inp0.size), 0, startAxis);
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
size_t planeSize = inp0.total() / (num * numPlanes);
MatShape s = shape(1, inputs[0].total());
UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
for (size_t i = 0; i < num; ++i)
{
s = shape(numPlanes, planeSize);
UMat src = inp.row(i).reshape(1, s.size(), &s[0]);
UMat dst = out.row(i).reshape(1, s.size(), &s[0]);
UMat abs_mat;
absdiff(src, cv::Scalar::all(0), abs_mat);
pow(abs_mat, pnorm, buffer);
if (planeSize == 1)
{
// add eps to avoid overflow
float absSum = sum(buffer)[0] + epsilon;
float norm = pow(absSum, 1.0f / pnorm);
multiply(src, 1.0f / norm, dst);
}
else
{
Mat norm;
reduce(buffer, norm, 0, REDUCE_SUM);
norm += epsilon;
// compute inverted norm to call multiply instead divide
cv::pow(norm, -1.0f / pnorm, norm);
repeat(norm, numPlanes, 1, buffer);
multiply(src, buffer, dst);
}
if (!blobs.empty())
{
// scale the output
Mat scale = blobs[0];
if (scale.total() == 1)
{
// _scale: 1 x 1
multiply(dst, scale.at<float>(0, 0), dst);
}
else
{
// _scale: _channels x 1
CV_Assert(scale.total() == numPlanes);
repeat(scale, 1, dst.cols, buffer);
multiply(dst, buffer, dst);
}
}
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
CV_Assert(inputs.size() == 1 && outputs.size() == 1);
CV_Assert(inputs[0].total() == outputs[0].total());
const Mat& inp0 = inputs[0];
Mat& buffer = internals[0];
startAxis = clamp(startAxis, inp0.dims);
endAxis = clamp(endAxis, inp0.dims);
const float* inpData = inp0.ptr<float>();
float* outData = outputs[0].ptr<float>();
size_t num = total(shape(inp0.size), 0, startAxis);
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
CV_Assert(num * numPlanes != 0);
size_t planeSize = inp0.total() / (num * numPlanes);
for (size_t n = 0; n < num; ++n)
{
Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData);
Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData);
cv::pow(abs(src), pnorm, buffer);
if (planeSize == 1)
{
// add eps to avoid overflow
float absSum = sum(buffer)[0] + epsilon;
float norm = pow(absSum, 1.0f / pnorm);
multiply(src, 1.0f / norm, dst);
}
else
{
Mat norm;
reduce(buffer, norm, 0, REDUCE_SUM);
norm += epsilon;
// compute inverted norm to call multiply instead divide
cv::pow(norm, -1.0f / pnorm, norm);
repeat(norm, numPlanes, 1, buffer);
multiply(src, buffer, dst);
}
if (!blobs.empty())
{
// scale the output
Mat scale = blobs[0];
if (scale.total() == 1)
{
// _scale: 1 x 1
dst *= scale.at<float>(0, 0);
}
else
{
// _scale: _channels x 1
CV_Assert(scale.total() == numPlanes);
repeat(scale, 1, dst.cols, buffer);
multiply(dst, buffer, dst);
}
}
inpData += numPlanes * planeSize;
outData += numPlanes * planeSize;
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
if(pnorm != 1 && pnorm != 2)
CV_Error(Error::StsNotImplemented, "Unsupported normalization mode");
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto input_shape = input_wrapper->getShape();
NormalizeConfiguration<float> config;
config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
config.axis_start = clamp(startAxis, input_shape.size());
config.axis_end = clamp(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */
config.norm = pnorm;
config.eps = epsilon;
const auto& weightsMat = blobs.empty() ? Mat() : blobs[0];
return make_cuda_node<cuda4dnn::NormalizeOp>(preferableTarget, std::move(context->stream), weightsMat, config);
}
#endif
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
std::vector<size_t> dims = input->getDims();
if (dims.size() == 4)
{
InferenceEngine::Builder::NormalizeLayer ieLayer(name);
ieLayer.setChannelShared(false);
ieLayer.setAcrossMaps(acrossSpatial);
ieLayer.setEpsilon(epsilon);
InferenceEngine::Builder::Layer l = ieLayer;
const int numChannels = dims[1];
InferenceEngine::Blob::Ptr weights;
if (blobs.empty())
{
weights = InferenceEngine::make_shared_blob<float>({
InferenceEngine::Precision::FP32,
{(size_t)numChannels}, InferenceEngine::Layout::C
});
weights->allocate();
Mat weightsMat = infEngineBlobToMat(weights).reshape(1, numChannels);
Mat(numChannels, 1, CV_32F, Scalar(1)).copyTo(weightsMat);
l.getParameters()["channel_shared"] = false;
}
else
{
CV_Assert(numChannels == blobs[0].total());
weights = wrapToInfEngineBlob(blobs[0], {(size_t)numChannels}, InferenceEngine::Layout::C);
l.getParameters()["channel_shared"] = blobs[0].total() == 1;
}
addConstantData("weights", weights, l);
l.getParameters()["across_spatial"] = acrossSpatial;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
else
{
InferenceEngine::Builder::GRNLayer ieLayer(name);
ieLayer.setBeta(epsilon);
InferenceEngine::Builder::Layer l = ieLayer;
l.getParameters()["bias"] = epsilon;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
const size_t batch = ieInpNode->get_shape()[0];
const size_t numChannels = ieInpNode->get_shape()[1];
std::vector<int64_t> axes_data;
if (!acrossSpatial) {
axes_data.push_back(1);
} else {
axes_data.resize(ieInpNode->get_shape().size());
std::iota(axes_data.begin(), axes_data.end(), 0);
}
auto axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes_data.size()}, axes_data);
auto norm = std::make_shared<ngraph::op::NormalizeL2>(ieInpNode, axes, epsilon, ngraph::op::EpsMode::ADD);
CV_Assert(blobs.empty() || numChannels == blobs[0].total());
std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
shape[0] = blobs.empty() ? 1 : batch;
shape[1] = numChannels;
std::shared_ptr<ngraph::op::Constant> weight;
if (blobs.empty())
{
std::vector<float> ones(numChannels, 1);
weight = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), ones.data());
}
else
{
// weight->get_shape().size() > 1 ~> channel_shared = false
weight = std::make_shared<ngraph::op::Constant>(
ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
}
auto mul = std::make_shared<ngraph::op::v1::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY);
return Ptr<BackendNode>(new InfEngineNgraphNode(mul));
}
#endif // HAVE_DNN_NGRAPH
private:
int startAxis, endAxis;
};
Ptr<NormalizeBBoxLayer> NormalizeBBoxLayer::create(const LayerParams &params)
{
return Ptr<NormalizeBBoxLayer>(new NormalizeBBoxLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,277 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of padding layer, which adds paddings to input blob.
*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <vector>
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/padding.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class PaddingLayerImpl CV_FINAL : public PaddingLayer
{
public:
PaddingLayerImpl(const LayerParams &params)
{
setParamsFrom(params);
paddingValue = params.get<float>("value", 0);
inputDims = params.get<int>("input_dims", -1);
paddingType = params.get<String>("type", "constant");
CV_Assert(params.has("paddings"));
const DictValue& paddingsParam = params.get("paddings");
CV_Assert((paddingsParam.size() & 1) == 0);
paddings.resize(paddingsParam.size() / 2);
for (int i = 0; i < paddings.size(); ++i)
{
paddings[i].first = paddingsParam.get<int>(i * 2); // Pad before.
paddings[i].second = paddingsParam.get<int>(i * 2 + 1); // Pad after.
CV_Assert_N(paddings[i].first >= 0, paddings[i].second >= 0);
}
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
const MatShape& inpShape = inputs[0];
CV_Assert(inpShape.size() >= paddings.size());
CV_Assert(inputDims == -1 || inpShape.size() == inputDims || inpShape.size() > paddings.size());
outputs.resize(1, inpShape);
int offset = (inputDims == -1 ? 0 : (inpShape.size() > inputDims ? 1 : 0));
for (int i = 0; i < paddings.size(); ++i)
{
outputs[0][offset + i] = inpShape[offset + i] + paddings[i].first + paddings[i].second;
}
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
// Compute dstRanges.
const MatSize& inpShape = inputs[0].size;
if (inputDims != -1 && inputs[0].dims != inputDims)
{
paddings.insert(paddings.begin(), std::make_pair(0, 0));
}
dstRanges.resize(paddings.size());
for (int i = 0; i < paddings.size(); ++i)
{
dstRanges[i].start = paddings[i].first;
dstRanges[i].end = paddings[i].first + inpShape[i];
}
// Add the rest of dimensions.
for (int i = dstRanges.size(); i < inputs[0].dims; ++i)
{
dstRanges.push_back(Range::all());
paddings.push_back(std::make_pair(0, 0));
}
inputDims = -1; // Next time paddings are filled for all the dimensions.
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) &&
(preferableTarget != DNN_TARGET_MYRIAD ||
(dstRanges.size() == 4 && paddings[0].first == 0 && paddings[0].second == 0));
#endif
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4);
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
if (paddingType == "constant")
{
if (inputs_arr.depth() == CV_16S)
{
std::vector<float> paddingValue_fp32(1, paddingValue);
std::vector<int16_t> paddingValue_fp16(1);
cv::convertFp16(paddingValue_fp32, paddingValue_fp16);
outputs[0].setTo(paddingValue_fp16[0]);
}
else
outputs[0].setTo(paddingValue);
inputs[0].copyTo(outputs[0](dstRanges));
}
else if (paddingType == "reflect")
{
CV_Assert(inputs.size() == 1);
CV_Assert(outputs.size() == 1);
CV_Assert(inputs[0].dims == 4);
CV_Assert(outputs[0].dims == 4);
if (inputs[0].size[0] != outputs[0].size[0] || inputs[0].size[1] != outputs[0].size[1])
CV_Error(Error::StsNotImplemented, "Only spatial reflection padding is supported.");
const int inpHeight = inputs[0].size[2];
const int inpWidth = inputs[0].size[3];
const int outHeight = outputs[0].size[2];
const int outWidth = outputs[0].size[3];
const int padTop = dstRanges[2].start;
const int padBottom = outHeight - dstRanges[2].end;
const int padLeft = dstRanges[3].start;
const int padRight = outWidth - dstRanges[3].end;
CV_CheckLT(padTop, inpHeight, ""); CV_CheckLT(padBottom, inpHeight, "");
CV_CheckLT(padLeft, inpWidth, ""); CV_CheckLT(padRight, inpWidth, "");
for (size_t n = 0; n < inputs[0].size[0]; ++n)
{
for (size_t ch = 0; ch < inputs[0].size[1]; ++ch)
{
copyMakeBorder(getPlane(inputs[0], n, ch),
getPlane(outputs[0], n, ch),
padTop, padBottom, padLeft, padRight,
BORDER_REFLECT_101);
}
}
}
else
CV_Error(Error::StsNotImplemented, "Unknown padding type: " + paddingType);
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
cuda4dnn::PaddingType ptype;
if (paddingType == "constant")
ptype = PaddingType::CONSTANT;
else if (paddingType == "reflect")
ptype = PaddingType::REFLECTION101;
else
CV_Error(Error::StsNotImplemented, "Unsupported padding mode");
return make_cuda_node<cuda4dnn::PaddingOp>(preferableTarget, std::move(context->stream), ptype, paddingValue, dstRanges);
}
#endif
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
int inW, inH, inC, inN;
int minN = std::max(dstRanges[0].start, 0);
int minC = std::max(dstRanges[1].start, 0);
int minY = std::max(dstRanges[2].start, 0);
int minX = std::max(dstRanges[3].start, 0);
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Func padded =
Halide::BoundaryConditions::constant_exterior(inputBuffer, paddingValue);
top(x, y, c, n) = padded(x - minX, y - minY, c - minC, n - minN);
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
ieLayer.setType("Pad");
std::vector<int> begins(paddings.size(), 0), ends(paddings.size(), 0);
for (int i = 0; i < paddings.size(); ++i)
{
begins[i] = paddings[i].first;
ends[i] = paddings[i].second;
}
ieLayer.getParameters()["pads_begin"] = begins;
ieLayer.getParameters()["pads_end"] = ends;
ieLayer.getParameters()["pad_mode"] = paddingType;
if (paddingType == "constant")
ieLayer.getParameters()["pad_value"] = paddingValue;
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<int64_t> begins(paddings.size(), 0), ends(paddings.size(), 0);
for (int i = 0; i < paddings.size(); ++i)
{
begins[i] = static_cast<int64_t>(paddings[i].first);
ends[i] = static_cast<int64_t>(paddings[i].second);
}
auto padding_below = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{begins.size()}, begins.data());
auto padding_above = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ends.size()}, ends.data());
auto pad_mode = paddingType == "constant" ? ngraph::op::PadMode::CONSTANT : ngraph::op::PadMode::REFLECT; // SYMMETRIC
auto arg_pad_value = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{}, &paddingValue);;
auto pad = paddingType == "constant" ?
std::make_shared<ngraph::op::v1::Pad>(ieInpNode, padding_below, padding_above, arg_pad_value, pad_mode) :
std::make_shared<ngraph::op::v1::Pad>(ieInpNode, padding_below, padding_above, pad_mode);
return Ptr<BackendNode>(new InfEngineNgraphNode(pad));
}
#endif
private:
std::vector<std::pair<int, int> > paddings; // Pairs pad before, pad after.
std::vector<Range> dstRanges;
int inputDims;
float paddingValue;
std::string paddingType;
};
Ptr<PaddingLayer> PaddingLayer::create(const LayerParams &params)
{
return Ptr<PaddingLayer>(new PaddingLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,450 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "../op_vkcom.hpp"
#include <float.h>
#include <algorithm>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/permute.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class PermuteLayerImpl CV_FINAL : public PermuteLayer
{
public:
void checkNeedForPermutation()
{
_needsPermute = false;
for (size_t i = 0; i < _numAxes; ++i)
{
if (_order[i] != i)
{
_needsPermute = true;
break;
}
}
}
PermuteLayerImpl(const LayerParams &params)
: _count(0), _needsPermute(false), _numAxes(0)
{
if (!params.has("order"))
{
return;
}
DictValue paramOrder = params.get("order");
_numAxes = paramOrder.size();
for (size_t i = 0; i < _numAxes; i++)
{
int currentOrder = paramOrder.get<int>(i);
if (currentOrder < 0 || currentOrder > _numAxes)
{
CV_Error(Error::StsBadArg,
format("Orders of dimensions in Permute layer parameter"
"must be in [0...%zu]", _numAxes - 1));
}
if (std::find(_order.begin(), _order.end(), currentOrder) != _order.end())
{
CV_Error(Error::StsBadArg,
"Permute layer parameter contains duplicated orders.");
}
_order.push_back(currentOrder);
}
setParamsFrom(params);
checkNeedForPermutation();
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine()) ||
(backendId == DNN_BACKEND_VKCOM && haveVulkan());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
if(!_needsPermute)
{
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return true;
}
CV_Assert(inputs.size() > 0);
CV_Assert((int)_numAxes == inputs[0].size());
MatShape shapeBefore = inputs[0], shapeAfter;
for (size_t i = 0; i < _numAxes; i++)
{
shapeAfter.push_back(shapeBefore[_order[i]]);
}
outputs.clear();
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(total(inputs[i]) == total(shapeAfter));
outputs.push_back(shapeAfter);
}
return false;
}
void computeStrides(const MatShape &shapeBefore, const MatShape &shapeAfter)
{
_oldStride.resize(_numAxes);
_newStride.resize(_numAxes);
_oldStride[_numAxes - 1] = 1;
_newStride[_numAxes - 1] = 1;
for(int i = _numAxes - 2; i >= 0; i--)
{
_oldStride[i] = _oldStride[i + 1] * shapeBefore[i + 1];
_newStride[i] = _newStride[i + 1] * shapeAfter[i + 1];
}
_count = _oldStride[0] * shapeBefore[0];
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
if(!_needsPermute)
{
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(inputs.size() > 0);
const Mat& inp0 = inputs[0];
CV_Assert((int)_numAxes == inp0.dims);
computeStrides(shape(inputs[0]), shape(outputs[0]));
#ifdef HAVE_OPENCL
if (uorder.empty())
{
std::vector<int> orderVec(_order.begin(), _order.end());;
Mat morder(1, orderVec.size(), CV_32SC1, &orderVec[0]);
std::vector<int> oldStrideVec(_oldStride.begin(), _oldStride.end());
Mat mold_stride(1, _oldStride.size(), CV_32SC1, &oldStrideVec[0]);
std::vector<int> newStrideVec(_newStride.begin(), _newStride.end());
Mat mnew_stride(1, newStrideVec.size(), CV_32SC1, &newStrideVec[0]);
morder.copyTo(uorder);
mold_stride.copyTo(uold_stride);
mnew_stride.copyTo(unew_stride);
}
#endif
}
class PermuteInvoker : public ParallelLoopBody
{
public:
const Mat* inp;
Mat* out;
const std::vector<size_t>* order;
int nstripes;
static void run(const Mat& inp, Mat& out, const std::vector<size_t>& order, int nstripes)
{
PermuteInvoker p;
p.inp = &inp;
p.out = &out;
p.order = &order;
p.nstripes = nstripes;
CV_Assert( out.size[0] == inp.size[order[0]] &&
out.size[1] == inp.size[order[1]] &&
out.size[2] == inp.size[order[2]] &&
out.size[3] == inp.size[order[3]]);
parallel_for_(Range(0, nstripes), p, nstripes);
}
PermuteInvoker() : inp(0), out(0), order(0), nstripes(0) {}
void operator()(const Range& r) const CV_OVERRIDE
{
int n0 = out->size[0], n1 = out->size[1], n2 = out->size[2], n3 = out->size[3];
size_t orows = (size_t)n0*n1*n2;
size_t stripeSize = (orows + nstripes - 1)/nstripes;
size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = std::min(r.end*stripeSize, orows);
const size_t esz = sizeof(float);
size_t ostep0 = out->step[0]/esz, ostep1 = out->step[1]/esz, ostep2 = out->step[2]/esz;
const size_t* ord = &order->at(0);
size_t istep0 = inp->step[ord[0]]/esz, istep1 = inp->step[ord[1]]/esz,
istep2 = inp->step[ord[2]]/esz, istep3 = inp->step[ord[3]]/esz;
size_t val = stripeStart;
int i2 = (int)(val % n2);
val /= n2;
int i1 = (int)(val % n1);
int i0 = (int)(val / n1);
const float* inptr_orig = inp->ptr<float>();
float* outptr_orig = out->ptr<float>();
for( size_t ofs = stripeStart; ofs < stripeEnd; ofs++ )
{
const float* inptr = inptr_orig + i0*istep0 + i1*istep1 + i2*istep2;
float* outptr = outptr_orig + i0*ostep0 + i1*ostep1 + i2*ostep2;
for( int i3 = 0; i3 < n3; i3++ )
outptr[i3] = inptr[i3*istep3];
if( ++i2 >= n2 )
{
i2 = 0;
if( ++i1 >= n1 )
{
i1 = 0;
if( ++i0 >= n0 )
break;
}
}
}
}
};
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (!_needsPermute)
return false;
bool use_half = (inps.depth() == CV_16S);
String opts = format("-DDtype=%s", use_half ? "half" : "float");
for (size_t i = 0; i < inputs.size(); i++)
{
ocl::Kernel kernel("permute", ocl::dnn::permute_oclsrc, opts);
kernel.set(0, (int)_count);
kernel.set(1, ocl::KernelArg::PtrReadOnly(inputs[i]));
kernel.set(2, ocl::KernelArg::PtrReadOnly(uorder));
kernel.set(3, ocl::KernelArg::PtrReadOnly(uold_stride));
kernel.set(4, ocl::KernelArg::PtrReadOnly(unew_stride));
kernel.set(5, (int)_numAxes);
kernel.set(6, ocl::KernelArg::PtrWriteOnly(outputs[i]));
if (!kernel.run(1, &_count, NULL, false))
return false;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
size_t k, ninputs = inputs.size();
if(!_needsPermute)
{
for (k = 0; k < ninputs; k++)
{
CV_Assert(outputs[k].total() == inputs[k].total());
if (outputs[k].data != inputs[k].data)
inputs[k].copyTo(outputs[k]);
}
}
else
{
size_t i, j, count = _count, numAxes = _numAxes;
const size_t* newStride = &_newStride[0];
const size_t* oldStride = &_oldStride[0];
const size_t* order = &_order[0];
for (k = 0; k < ninputs; k++)
{
const Mat& inp = inputs[k];
Mat& out = outputs[k];
CV_Assert(inp.dims == numAxes && inp.size == inputs[0].size);
CV_Assert(out.dims == numAxes && out.size == outputs[0].size);
CV_Assert(inp.isContinuous() && out.isContinuous());
CV_Assert(inp.type() == CV_32F && out.type() == CV_32F);
if( numAxes == 4 )
{
int nstripes = getNumThreads();
PermuteInvoker::run(inp, out, _order, nstripes);
}
else
{
const float *srcData = inp.ptr<float>();
float *dstData = out.ptr<float>();
for (i = 0; i < count; ++i)
{
size_t oldPosition = 0;
size_t newPosition = i;
for (j = 0; j < numAxes; ++j)
{
oldPosition += (newPosition / newStride[j]) * oldStride[order[j]];
newPosition %= newStride[j];
}
dstData[i] = srcData[oldPosition];
}
}
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::PermuteOp>(preferableTarget, std::move(context->stream), _order);
}
#endif
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
#ifdef HAVE_VULKAN
CV_Assert(!_order.empty());
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPermute(_order));
return Ptr<BackendNode>(new VkComBackendNode(input, op));
#endif // HAVE_VULKAN
return Ptr<BackendNode>();
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::PermuteLayer ieLayer(name);
ieLayer.setOrder(_order);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto tr_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape({_order.size()}), _order.data());
auto transpose = std::make_shared<ngraph::op::Transpose>(ieInpNode, tr_axes);
return Ptr<BackendNode>(new InfEngineNgraphNode(transpose));
}
#endif // HAVE_DNN_NGRAPH
size_t _count;
std::vector<size_t> _order;
std::vector<int> _oldDimensionSize;
std::vector<int> _newDimensionSize;
std::vector<size_t> _oldStride;
std::vector<size_t> _newStride;
bool _needsPermute;
#ifdef HAVE_OPENCL
UMat uorder, uold_stride, unew_stride;
#endif
size_t _numAxes;
};
Ptr<PermuteLayer> PermuteLayer::create(const LayerParams &params)
{
return Ptr<PermuteLayer>(new PermuteLayerImpl(params));
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,757 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#include <ngraph/op/experimental/layers/prior_box.hpp>
#include <ngraph/op/experimental/layers/prior_box_clustered.hpp>
#endif
#include "../op_vkcom.hpp"
#include <float.h>
#include <algorithm>
#include <cmath>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/prior_box.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class PriorBoxLayerImpl CV_FINAL : public PriorBoxLayer
{
public:
static bool getParameterDict(const LayerParams &params,
const std::string &parameterName,
DictValue& result)
{
if (!params.has(parameterName))
{
return false;
}
result = params.get(parameterName);
return true;
}
template<typename T>
T getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx=0,
const bool required=true,
const T& defaultValue=T())
{
DictValue dictValue;
bool success = getParameterDict(params, parameterName, dictValue);
if(!success)
{
if(required)
{
std::string message = _layerName;
message += " layer parameter does not contain ";
message += parameterName;
message += " parameter.";
CV_Error(Error::StsBadArg, message);
}
else
{
return defaultValue;
}
}
return dictValue.get<T>(idx);
}
void getAspectRatios(const LayerParams &params)
{
DictValue aspectRatioParameter;
bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
if (!aspectRatioRetieved)
return;
for (int i = 0; i < aspectRatioParameter.size(); ++i)
{
float aspectRatio = aspectRatioParameter.get<float>(i);
bool alreadyExists = fabs(aspectRatio - 1.f) < 1e-6f;
for (size_t j = 0; j < _aspectRatios.size() && !alreadyExists; ++j)
{
alreadyExists = fabs(aspectRatio - _aspectRatios[j]) < 1e-6;
}
if (!alreadyExists)
{
_aspectRatios.push_back(aspectRatio);
if (_flip)
{
_aspectRatios.push_back(1./aspectRatio);
}
}
}
}
static void getParams(const std::string& name, const LayerParams &params,
std::vector<float>* values)
{
DictValue dict;
if (getParameterDict(params, name, dict))
{
values->resize(dict.size());
for (int i = 0; i < dict.size(); ++i)
{
(*values)[i] = dict.get<float>(i);
}
}
else
values->clear();
}
void getVariance(const LayerParams &params)
{
DictValue varianceParameter;
bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
CV_Assert(varianceParameterRetrieved);
int varianceSize = varianceParameter.size();
if (varianceSize > 1)
{
// Must and only provide 4 variance.
CV_Assert(varianceSize == 4);
for (int i = 0; i < varianceSize; ++i)
{
float variance = varianceParameter.get<float>(i);
CV_Assert(variance > 0);
_variance.push_back(variance);
}
}
else
{
if (varianceSize == 1)
{
float variance = varianceParameter.get<float>(0);
CV_Assert(variance > 0);
_variance.push_back(variance);
}
else
{
// Set default to 0.1.
_variance.push_back(0.1f);
}
}
}
PriorBoxLayerImpl(const LayerParams &params)
{
setParamsFrom(params);
_flip = getParameter<bool>(params, "flip", 0, false, true);
_clip = getParameter<bool>(params, "clip", 0, false, true);
_bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
getParams("min_size", params, &_minSize);
getAspectRatios(params);
getVariance(params);
if (params.has("max_size"))
{
getParams("max_size", params, &_maxSize);
CV_Assert(_minSize.size() == _maxSize.size());
for (int i = 0; i < _maxSize.size(); i++)
CV_Assert(_minSize[i] < _maxSize[i]);
}
std::vector<float> widths, heights;
getParams("width", params, &widths);
getParams("height", params, &heights);
_explicitSizes = !widths.empty();
CV_Assert(widths.size() == heights.size());
if (_explicitSizes)
{
CV_Assert(_aspectRatios.empty());
CV_Assert(!params.has("min_size"));
CV_Assert(!params.has("max_size"));
_boxWidths = widths;
_boxHeights = heights;
}
else
{
CV_Assert(!_minSize.empty());
for (int i = 0; i < _minSize.size(); ++i)
{
float minSize = _minSize[i];
CV_Assert(minSize > 0);
_boxWidths.push_back(minSize);
_boxHeights.push_back(minSize);
if (_maxSize.size() > 0)
{
float size = sqrt(minSize * _maxSize[i]);
_boxWidths.push_back(size);
_boxHeights.push_back(size);
}
// rest of priors
for (size_t r = 0; r < _aspectRatios.size(); ++r)
{
float arSqrt = sqrt(_aspectRatios[r]);
_boxWidths.push_back(minSize * arSqrt);
_boxHeights.push_back(minSize / arSqrt);
}
}
}
CV_Assert(_boxWidths.size() == _boxHeights.size());
_numPriors = _boxWidths.size();
if (params.has("step_h") || params.has("step_w")) {
CV_Assert(!params.has("step"));
_stepY = getParameter<float>(params, "step_h");
CV_Assert(_stepY > 0.);
_stepX = getParameter<float>(params, "step_w");
CV_Assert(_stepX > 0.);
} else if (params.has("step")) {
const float step = getParameter<float>(params, "step");
CV_Assert(step > 0);
_stepY = step;
_stepX = step;
} else {
_stepY = 0;
_stepX = 0;
}
if (params.has("offset_h") || params.has("offset_w"))
{
CV_Assert_N(!params.has("offset"), params.has("offset_h"), params.has("offset_w"));
getParams("offset_h", params, &_offsetsY);
getParams("offset_w", params, &_offsetsX);
CV_Assert(_offsetsX.size() == _offsetsY.size());
_numPriors *= std::max((size_t)1, 2 * (_offsetsX.size() - 1));
}
else
{
float offset = getParameter<float>(params, "offset", 0, false, 0.5);
_offsetsX.assign(1, offset);
_offsetsY.assign(1, offset);
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_DNN_NGRAPH
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return _explicitSizes || _stepX == _stepY;
#endif
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() &&
( _explicitSizes || (_minSize.size() == 1 && _maxSize.size() <= 1)))
|| (backendId == DNN_BACKEND_VKCOM && haveVulkan());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(!inputs.empty());
int layerHeight = inputs[0][2];
int layerWidth = inputs[0][3];
// Since all images in a batch has same height and width, we only need to
// generate one set of priors which can be shared across all images.
size_t outNum = 1;
// 2 channels. First channel stores the mean of each prior coordinate.
// Second channel stores the variance of each prior coordinate.
size_t outChannels = 2;
outputs.resize(1, shape(outNum, outChannels,
layerHeight * layerWidth * _numPriors * 4));
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
CV_CheckGT(inputs.size(), (size_t)1, "");
CV_CheckEQ(inputs[0].dims, 4, ""); CV_CheckEQ(inputs[1].dims, 4, "");
int layerWidth = inputs[0].size[3];
int layerHeight = inputs[0].size[2];
int imageWidth = inputs[1].size[3];
int imageHeight = inputs[1].size[2];
_stepY = _stepY == 0 ? (static_cast<float>(imageHeight) / layerHeight) : _stepY;
_stepX = _stepX == 0 ? (static_cast<float>(imageWidth) / layerWidth) : _stepX;
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int _layerWidth = inputs[0].size[3];
int _layerHeight = inputs[0].size[2];
int _imageWidth = inputs[1].size[3];
int _imageHeight = inputs[1].size[2];
if (umat_offsetsX.empty())
{
Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);
Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);
Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);
offsetsX.copyTo(umat_offsetsX);
offsetsY.copyTo(umat_offsetsY);
variance.copyTo(umat_variance);
widths.copyTo(umat_widths);
heights.copyTo(umat_heights);
}
String opts;
if (use_half)
opts = "-DDtype=half -DDtype4=half4 -Dconvert_T=convert_half4";
else
opts = "-DDtype=float -DDtype4=float4 -Dconvert_T=convert_float4";
size_t nthreads = _layerHeight * _layerWidth;
ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc, opts);
kernel.set(0, (int)nthreads);
kernel.set(1, (float)_stepX);
kernel.set(2, (float)_stepY);
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
kernel.set(5, (int)_offsetsX.size());
kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));
kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));
kernel.set(8, (int)_boxWidths.size());
kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));
kernel.set(10, (int)_layerHeight);
kernel.set(11, (int)_layerWidth);
kernel.set(12, (int)_imageHeight);
kernel.set(13, (int)_imageWidth);
kernel.run(1, &nthreads, NULL, false);
// clip the prior's coordinate such that it is within [0, 1]
if (_clip)
{
ocl::Kernel kernel("clip", ocl::dnn::prior_box_oclsrc, opts);
size_t nthreads = _layerHeight * _layerWidth * _numPriors * 4;
if (!kernel.args((int)nthreads, ocl::KernelArg::PtrReadWrite(outputs[0]))
.run(1, &nthreads, NULL, false))
return false;
}
// set the variance.
{
ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc, opts);
int offset = total(shape(outputs[0]), 2);
size_t nthreads = _layerHeight * _layerWidth * _numPriors;
kernel.set(0, (int)nthreads);
kernel.set(1, (int)offset);
kernel.set(2, (int)_variance.size());
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance));
kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0]));
if (!kernel.run(1, &nthreads, NULL, false))
return false;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(inputs.size() == 2);
int _layerWidth = inputs[0].size[3];
int _layerHeight = inputs[0].size[2];
int _imageWidth = inputs[1].size[3];
int _imageHeight = inputs[1].size[2];
float* outputPtr = outputs[0].ptr<float>();
float _boxWidth, _boxHeight;
for (size_t h = 0; h < _layerHeight; ++h)
{
for (size_t w = 0; w < _layerWidth; ++w)
{
for (size_t i = 0; i < _boxWidths.size(); ++i)
{
_boxWidth = _boxWidths[i];
_boxHeight = _boxHeights[i];
for (int j = 0; j < _offsetsX.size(); ++j)
{
float center_x = (w + _offsetsX[j]) * _stepX;
float center_y = (h + _offsetsY[j]) * _stepY;
outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
_imageHeight, _bboxesNormalized, outputPtr);
}
}
}
}
// clip the prior's coordinate such that it is within [0, 1]
if (_clip)
{
int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
outputPtr = outputs[0].ptr<float>();
for (size_t d = 0; d < _outChannelSize; ++d)
{
outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
}
}
// set the variance.
outputPtr = outputs[0].ptr<float>(0, 1);
if(_variance.size() == 1)
{
Mat secondChannel(1, outputs[0].size[2], CV_32F, outputPtr);
secondChannel.setTo(Scalar::all(_variance[0]));
}
else
{
int count = 0;
for (size_t h = 0; h < _layerHeight; ++h)
{
for (size_t w = 0; w < _layerWidth; ++w)
{
for (size_t i = 0; i < _numPriors; ++i)
{
for (int j = 0; j < 4; ++j)
{
outputPtr[count] = _variance[j];
++count;
}
}
}
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
auto feature_map_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto feature_map_shape = feature_map_wrapper->getShape();
auto image_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
auto image_shape = image_wrapper->getShape();
PriorBoxConfiguration config;
config.feature_map_width = feature_map_shape.rbegin()[0];
config.feature_map_height = feature_map_shape.rbegin()[1];
config.image_width = image_shape.rbegin()[0];
config.image_height = image_shape.rbegin()[1];
config.num_priors = _numPriors;
config.box_widths = _boxWidths;
config.box_heights = _boxHeights;
config.offsets_x = _offsetsX;
config.offsets_y = _offsetsY;
config.stepX = _stepX;
config.stepY = _stepY;
config.variance = _variance;
config.clip = _clip;
config.normalize = _bboxesNormalized;
return make_cuda_node<cuda4dnn::PriorBoxOp>(preferableTarget, std::move(context->stream), config);
}
#endif
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
#ifdef HAVE_VULKAN
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,
_clip, _numPriors,
_variance, _offsetsX,
_offsetsY, _boxWidths,
_boxHeights));
return Ptr<BackendNode>(new VkComBackendNode(input, op));
#endif // HAVE_VULKAN
return Ptr<BackendNode>();
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
if (_explicitSizes)
{
InferenceEngine::Builder::PriorBoxClusteredLayer ieLayer(name);
ieLayer.setSteps({_stepY, _stepX});
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
ieLayer.setOffset(_offsetsX[0]);
ieLayer.setClip(_clip);
ieLayer.setFlip(false); // We already flipped aspect ratios.
InferenceEngine::Builder::Layer l = ieLayer;
CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
CV_Assert(_boxWidths.size() == _boxHeights.size());
l.getParameters()["width"] = _boxWidths;
l.getParameters()["height"] = _boxHeights;
l.getParameters()["variance"] = _variance;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
else
{
InferenceEngine::Builder::PriorBoxLayer ieLayer(name);
CV_Assert(!_explicitSizes);
ieLayer.setMinSize(_minSize[0]);
if (!_maxSize.empty())
ieLayer.setMaxSize(_maxSize[0]);
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
ieLayer.setOffset(_offsetsX[0]);
ieLayer.setClip(_clip);
ieLayer.setFlip(false); // We already flipped aspect ratios.
InferenceEngine::Builder::Layer l = ieLayer;
if (_stepX == _stepY)
{
l.getParameters()["step"] = _stepX;
l.getParameters()["step_h"] = 0.0f;
l.getParameters()["step_w"] = 0.0f;
}
else
{
l.getParameters()["step"] = 0.0f;
l.getParameters()["step_h"] = _stepY;
l.getParameters()["step_w"] = _stepX;
}
if (!_aspectRatios.empty())
{
l.getParameters()["aspect_ratio"] = _aspectRatios;
}
CV_Assert(!_variance.empty());
l.getParameters()["variance"] = _variance;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert(nodes.size() == 2);
auto layer = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto image = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
auto layer_shape = std::make_shared<ngraph::op::ShapeOf>(layer);
auto image_shape = std::make_shared<ngraph::op::ShapeOf>(image);
auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{2});
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{4});
auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{1});
auto slice_layer = std::make_shared<ngraph::op::v1::StridedSlice>(layer_shape,
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
auto slice_image = std::make_shared<ngraph::op::v1::StridedSlice>(image_shape,
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
if (_explicitSizes)
{
CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
CV_Assert(_boxWidths.size() == _boxHeights.size());
ngraph::op::PriorBoxClusteredAttrs attrs;
attrs.widths = _boxWidths;
attrs.heights = _boxHeights;
attrs.clip = _clip;
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
attrs.offset = _offsetsX[0];
attrs.step_heights = _stepY;
attrs.step_widths = _stepX;
attrs.variances = _variance;
auto priorBox = std::make_shared<ngraph::op::PriorBoxClustered>(slice_layer, slice_image, attrs);
auto axis = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{0});
auto unsqueeze = std::make_shared<ngraph::op::Unsqueeze>(priorBox, axis);
return Ptr<BackendNode>(new InfEngineNgraphNode(unsqueeze));
}
else
{
ngraph::op::PriorBoxAttrs attrs;
attrs.min_size = _minSize;
attrs.max_size = _maxSize;
// doesn't work with empty aspectRatio
attrs.aspect_ratio = !_aspectRatios.empty()? _aspectRatios : std::vector<float>{1.0f};
attrs.clip = _clip;
attrs.flip = false;
attrs.variance = _variance;
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
attrs.offset = _offsetsX[0];
attrs.step = _stepX;
attrs.scale_all_sizes = !_aspectRatios.empty();
auto priorBox = std::make_shared<ngraph::op::PriorBox>(slice_layer, slice_image, attrs);
auto axis = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{0});
auto unsqueeze = std::make_shared<ngraph::op::Unsqueeze>(priorBox, axis);
return Ptr<BackendNode>(new InfEngineNgraphNode(unsqueeze));
}
}
#endif // HAVE_DNN_NGRAPH
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
long flops = 0;
for (int i = 0; i < inputs.size(); i++)
{
flops += total(inputs[i], 2) * _numPriors * 4;
}
return flops;
}
private:
std::vector<float> _minSize;
std::vector<float> _maxSize;
float _stepX, _stepY;
std::vector<float> _aspectRatios;
std::vector<float> _variance;
std::vector<float> _offsetsX;
std::vector<float> _offsetsY;
// Precomputed final widths and heights based on aspect ratios or explicit sizes.
std::vector<float> _boxWidths;
std::vector<float> _boxHeights;
#ifdef HAVE_OPENCL
UMat umat_offsetsX;
UMat umat_offsetsY;
UMat umat_widths;
UMat umat_heights;
UMat umat_variance;
#endif
bool _flip;
bool _clip;
bool _explicitSizes;
bool _bboxesNormalized;
size_t _numPriors;
static const size_t _numAxes = 4;
static const std::string _layerName;
static float* addPrior(float center_x, float center_y, float width, float height,
float imgWidth, float imgHeight, bool normalized, float* dst)
{
if (normalized)
{
dst[0] = (center_x - width * 0.5f) / imgWidth; // xmin
dst[1] = (center_y - height * 0.5f) / imgHeight; // ymin
dst[2] = (center_x + width * 0.5f) / imgWidth; // xmax
dst[3] = (center_y + height * 0.5f) / imgHeight; // ymax
}
else
{
dst[0] = center_x - width * 0.5f; // xmin
dst[1] = center_y - height * 0.5f; // ymin
dst[2] = center_x + width * 0.5f - 1.0f; // xmax
dst[3] = center_y + height * 0.5f - 1.0f; // ymax
}
return dst + 4;
}
};
const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox");
Ptr<PriorBoxLayer> PriorBoxLayer::create(const LayerParams &params)
{
return Ptr<PriorBoxLayer>(new PriorBoxLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,441 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_inf_engine.hpp"
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#include <ngraph/op/experimental/layers/proposal.hpp>
#endif
namespace cv { namespace dnn {
class ProposalLayerImpl CV_FINAL : public ProposalLayer
{
public:
ProposalLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
featStride = params.get<uint32_t>("feat_stride", 16);
baseSize = params.get<uint32_t>("base_size", 16);
// uint32_t minSize = params.get<uint32_t>("min_size", 16);
keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);
keepTopAfterNMS = params.get<uint32_t>("post_nms_topn", 300);
nmsThreshold = params.get<float>("nms_thresh", 0.7);
ratios = params.get("ratio");
scales = params.get("scale");
{
LayerParams lp;
lp.set("step", featStride);
lp.set("flip", false);
lp.set("clip", false);
lp.set("normalized_bbox", false);
lp.set("offset", 0.5 * baseSize / featStride);
// Unused values.
float variance[] = {0.1f, 0.1f, 0.2f, 0.2f};
lp.set("variance", DictValue::arrayReal<float*>(&variance[0], 4));
// Compute widths and heights explicitly.
std::vector<float> widths, heights;
widths.reserve(ratios.size() * scales.size());
heights.reserve(ratios.size() * scales.size());
for (int i = 0; i < ratios.size(); ++i)
{
float ratio = ratios.get<float>(i);
for (int j = 0; j < scales.size(); ++j)
{
float scale = scales.get<float>(j);
float width = std::floor(baseSize / sqrt(ratio) + 0.5f);
float height = std::floor(width * ratio + 0.5f);
widths.push_back(scale * width);
heights.push_back(scale * height);
}
}
lp.set("width", DictValue::arrayReal<float*>(&widths[0], widths.size()));
lp.set("height", DictValue::arrayReal<float*>(&heights[0], heights.size()));
priorBoxLayer = PriorBoxLayer::create(lp);
}
{
int order[] = {0, 2, 3, 1};
LayerParams lp;
lp.set("order", DictValue::arrayInt<int*>(&order[0], 4));
deltasPermute = PermuteLayer::create(lp);
scoresPermute = PermuteLayer::create(lp);
}
{
LayerParams lp;
lp.set("code_type", "CENTER_SIZE");
lp.set("num_classes", 1);
lp.set("share_location", true);
lp.set("background_label_id", 1); // We won't pass background scores so set it out of range [0, num_classes)
lp.set("variance_encoded_in_target", true);
lp.set("keep_top_k", keepTopAfterNMS);
lp.set("top_k", keepTopBeforeNMS);
lp.set("nms_threshold", nmsThreshold);
lp.set("normalized_bbox", false);
lp.set("clip", true);
detectionOutputLayer = DetectionOutputLayer::create(lp);
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && preferableTarget != DNN_TARGET_MYRIAD);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
// We need to allocate the following blobs:
// - output priors from PriorBoxLayer
// - permuted priors
// - permuted scores
CV_Assert(inputs.size() == 3);
const MatShape& scores = inputs[0];
const MatShape& bboxDeltas = inputs[1];
std::vector<MatShape> layerInputs, layerOutputs, layerInternals;
// Prior boxes layer.
layerInputs.assign(1, scores);
priorBoxLayer->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
CV_Assert(layerOutputs.size() == 1);
CV_Assert(layerInternals.empty());
internals.push_back(layerOutputs[0]);
// Scores permute layer.
CV_Assert(scores.size() == 4);
MatShape objectScores = scores;
CV_Assert((scores[1] & 1) == 0); // Number of channels is even.
objectScores[1] /= 2;
layerInputs.assign(1, objectScores);
scoresPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
CV_Assert(layerOutputs.size() == 1);
CV_Assert(layerInternals.empty());
internals.push_back(layerOutputs[0]);
// BBox predictions permute layer.
layerInputs.assign(1, bboxDeltas);
deltasPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
CV_Assert(layerOutputs.size() == 1);
CV_Assert(layerInternals.empty());
internals.push_back(layerOutputs[0]);
// Detections layer.
internals.push_back(shape(1, 1, keepTopAfterNMS, 7));
outputs.resize(2);
outputs[0] = shape(keepTopAfterNMS, 5);
outputs[1] = shape(keepTopAfterNMS, 1);
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
std::vector<Mat> layerInputs;
std::vector<Mat> layerOutputs;
// Scores permute layer.
Mat scores = getObjectScores(inputs[0]);
layerInputs.assign(1, scores);
layerOutputs.assign(1, Mat(shape(scores.size[0], scores.size[2],
scores.size[3], scores.size[1]), CV_32FC1));
scoresPermute->finalize(layerInputs, layerOutputs);
// BBox predictions permute layer.
const Mat& bboxDeltas = inputs[1];
CV_Assert(bboxDeltas.dims == 4);
layerInputs.assign(1, bboxDeltas);
layerOutputs.assign(1, Mat(shape(bboxDeltas.size[0], bboxDeltas.size[2],
bboxDeltas.size[3], bboxDeltas.size[1]), CV_32FC1));
deltasPermute->finalize(layerInputs, layerOutputs);
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
if (inputs_.depth() == CV_16S)
return false;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
internals_.getUMatVector(internals);
CV_Assert(inputs.size() == 3);
CV_Assert(internals.size() == 4);
const UMat& scores = inputs[0];
const UMat& bboxDeltas = inputs[1];
const UMat& imInfo = inputs[2];
UMat& priorBoxes = internals[0];
UMat& permuttedScores = internals[1];
UMat& permuttedDeltas = internals[2];
UMat& detections = internals[3];
CV_Assert(imInfo.total() >= 2);
// We've chosen the smallest data type because we need just a shape from it.
Mat szMat;
imInfo.copyTo(szMat);
int rows = (int)szMat.at<float>(0);
int cols = (int)szMat.at<float>(1);
umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1);
umat_fakeImageBlob.setTo(0);
// Generate prior boxes.
std::vector<UMat> layerInputs(2), layerOutputs(1, priorBoxes);
layerInputs[0] = scores;
layerInputs[1] = umat_fakeImageBlob;
priorBoxLayer->forward(layerInputs, layerOutputs, internals);
// Permute scores.
layerInputs.assign(1, getObjectScores(scores));
layerOutputs.assign(1, permuttedScores);
scoresPermute->forward(layerInputs, layerOutputs, internals);
// Permute deltas.
layerInputs.assign(1, bboxDeltas);
layerOutputs.assign(1, permuttedDeltas);
deltasPermute->forward(layerInputs, layerOutputs, internals);
// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
// output internally because of different number of objects after NMS.
layerInputs.resize(4);
layerInputs[0] = permuttedDeltas;
layerInputs[1] = permuttedScores;
layerInputs[2] = priorBoxes;
layerInputs[3] = umat_fakeImageBlob;
layerOutputs[0] = detections;
detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
// DetectionOutputLayer produces 1x1xNx7 output where N might be less or
// equal to keepTopAfterNMS. We fill the rest by zeros.
const int numDets = layerOutputs[0].total() / 7;
CV_Assert(numDets <= keepTopAfterNMS);
MatShape s = shape(numDets, 7);
layerOutputs[0] = layerOutputs[0].reshape(1, s.size(), &s[0]);
// The boxes.
UMat dst = outputs[0].rowRange(0, numDets);
layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));
dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
// The scores.
dst = outputs[1].rowRange(0, numDets);
layerOutputs[0].col(2).copyTo(dst);
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
CV_Assert(inputs.size() == 3);
CV_Assert(internals.size() == 4);
const Mat& scores = inputs[0];
const Mat& bboxDeltas = inputs[1];
const Mat& imInfo = inputs[2];
Mat& priorBoxes = internals[0];
Mat& permuttedScores = internals[1];
Mat& permuttedDeltas = internals[2];
Mat& detections = internals[3];
CV_Assert(imInfo.total() >= 2);
// We've chosen the smallest data type because we need just a shape from it.
fakeImageBlob.create(shape(1, 1, imInfo.at<float>(0), imInfo.at<float>(1)), CV_8UC1);
// Generate prior boxes.
std::vector<Mat> layerInputs(2), layerOutputs(1, priorBoxes);
layerInputs[0] = scores;
layerInputs[1] = fakeImageBlob;
priorBoxLayer->forward(layerInputs, layerOutputs, internals);
// Permute scores.
layerInputs.assign(1, getObjectScores(scores));
layerOutputs.assign(1, permuttedScores);
scoresPermute->forward(layerInputs, layerOutputs, internals);
// Permute deltas.
layerInputs.assign(1, bboxDeltas);
layerOutputs.assign(1, permuttedDeltas);
deltasPermute->forward(layerInputs, layerOutputs, internals);
// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
// output internally because of different number of objects after NMS.
layerInputs.resize(4);
layerInputs[0] = permuttedDeltas;
layerInputs[1] = permuttedScores;
layerInputs[2] = priorBoxes;
layerInputs[3] = fakeImageBlob;
layerOutputs[0] = detections;
detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
// DetectionOutputLayer produces 1x1xNx7 output where N might be less or
// equal to keepTopAfterNMS. We fill the rest by zeros.
const int numDets = layerOutputs[0].total() / 7;
CV_Assert(numDets <= keepTopAfterNMS);
// The boxes.
layerOutputs[0] = layerOutputs[0].reshape(1, numDets);
Mat dst = outputs[0].rowRange(0, numDets);
layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));
dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
// The scores.
dst = outputs[1].rowRange(0, numDets);
layerOutputs[0].col(2).copyTo(dst);
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::ProposalLayer ieLayer(name);
ieLayer.setBaseSize(baseSize);
ieLayer.setFeatStride(featStride);
ieLayer.setMinSize(16);
ieLayer.setNMSThresh(nmsThreshold);
ieLayer.setPostNMSTopN(keepTopAfterNMS);
ieLayer.setPreNMSTopN(keepTopBeforeNMS);
std::vector<float> scalesVec(scales.size());
for (int i = 0; i < scales.size(); ++i)
scalesVec[i] = scales.get<float>(i);
ieLayer.setScale(scalesVec);
std::vector<float> ratiosVec(ratios.size());
for (int i = 0; i < ratios.size(); ++i)
ratiosVec[i] = ratios.get<float>(i);
ieLayer.setRatio(ratiosVec);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert(nodes.size() == 3);
ngraph::op::ProposalAttrs attr;
attr.base_size = baseSize;
attr.nms_thresh = nmsThreshold;
attr.feat_stride = featStride;
attr.min_size = 16;
attr.pre_nms_topn = keepTopBeforeNMS;
attr.post_nms_topn = keepTopAfterNMS;
std::vector<float> ratiosVec(ratios.size());
for (int i = 0; i < ratios.size(); ++i)
ratiosVec[i] = ratios.get<float>(i);
attr.ratio = ratiosVec;
std::vector<float> scalesVec(scales.size());
for (int i = 0; i < scales.size(); ++i)
scalesVec[i] = scales.get<float>(i);
attr.scale = scalesVec;
auto& class_probs = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto& class_logits = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
auto& image_shape = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
CV_Assert_N(image_shape->get_shape().size() == 2, image_shape->get_shape().front() == 1);
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{1},
std::vector<int64_t>{(int64_t)image_shape->get_shape().back()});
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(image_shape, shape, true);
auto proposal = std::make_shared<ngraph::op::Proposal>(class_probs, class_logits, reshape, attr);
return Ptr<BackendNode>(new InfEngineNgraphNode(proposal));
}
#endif // HAVE_DNN_NGRAPH
private:
// A first half of channels are background scores. We need only a second one.
static Mat getObjectScores(const Mat& m)
{
CV_Assert(m.dims == 4);
CV_Assert(m.size[0] == 1);
int channels = m.size[1];
CV_Assert((channels & 1) == 0);
return slice(m, Range::all(), Range(channels / 2, channels));
}
#ifdef HAVE_OPENCL
static UMat getObjectScores(const UMat& m)
{
CV_Assert(m.dims == 4);
CV_Assert(m.size[0] == 1);
int channels = m.size[1];
CV_Assert((channels & 1) == 0);
Range r = Range(channels / 2, channels);
Range ranges[4] = { Range::all(), r, Range::all(), Range::all() };
return m(&ranges[0]);
}
#endif
Ptr<PriorBoxLayer> priorBoxLayer;
Ptr<DetectionOutputLayer> detectionOutputLayer;
Ptr<PermuteLayer> deltasPermute;
Ptr<PermuteLayer> scoresPermute;
uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize;
Mat fakeImageBlob;
float nmsThreshold;
DictValue ratios, scales;
#ifdef HAVE_OPENCL
UMat umat_fakeImageBlob;
#endif
};
Ptr<ProposalLayer> ProposalLayer::create(const LayerParams& params)
{
return Ptr<ProposalLayer>(new ProposalLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@@ -0,0 +1,550 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include <iostream>
#include <iterator>
#include <cmath>
#include <opencv2/dnn/shape_utils.hpp>
namespace cv
{
namespace dnn
{
template<typename Dtype>
static void tanh(const Mat &src, Mat &dst)
{
MatConstIterator_<Dtype> itSrc = src.begin<Dtype>();
MatIterator_<Dtype> itDst = dst.begin<Dtype>();
for (; itSrc != src.end<Dtype>(); itSrc++, itDst++)
*itDst = std::tanh(*itSrc);
}
//TODO: make utils method
static void tanh(const Mat &src, Mat &dst)
{
dst.create(src.dims, (const int*)src.size, src.type());
if (src.type() == CV_32F)
tanh<float>(src, dst);
else if (src.type() == CV_64F)
tanh<double>(src, dst);
else
CV_Error(Error::StsUnsupportedFormat, "Function supports only floating point types");
}
static void sigmoid(const Mat &src, Mat &dst)
{
cv::exp(-src, dst);
cv::pow(1 + dst, -1, dst);
}
class LSTMLayerImpl CV_FINAL : public LSTMLayer
{
int numTimeStamps, numSamples;
bool allocated;
MatShape outTailShape; //shape of single output sample
MatShape outTsShape; //shape of N output samples
bool useTimestampDim;
bool produceCellOutput;
float forgetBias, cellClip;
bool useCellClip, usePeephole;
bool reverse; // If true, go in negative direction along the time axis
public:
LSTMLayerImpl(const LayerParams& params)
: numTimeStamps(0), numSamples(0)
{
setParamsFrom(params);
if (!blobs.empty())
{
CV_Assert(blobs.size() >= 3);
blobs[2] = blobs[2].reshape(1, 1);
const Mat& Wh = blobs[0];
const Mat& Wx = blobs[1];
const Mat& bias = blobs[2];
CV_Assert(Wh.dims == 2 && Wx.dims == 2);
CV_Assert(Wh.rows == Wx.rows);
CV_Assert(Wh.rows == 4*Wh.cols);
CV_Assert(Wh.rows == (int)bias.total());
CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
// Peephole weights.
if (blobs.size() > 3)
{
CV_Assert(blobs.size() == 6);
const int N = Wh.cols;
for (int i = 3; i < 6; ++i)
{
CV_Assert(blobs[i].rows == N && blobs[i].cols == N);
CV_Assert(blobs[i].type() == bias.type());
}
}
}
useTimestampDim = params.get<bool>("use_timestamp_dim", true);
produceCellOutput = params.get<bool>("produce_cell_output", false);
forgetBias = params.get<float>("forget_bias", 0.0f);
cellClip = params.get<float>("cell_clip", 0.0f);
useCellClip = params.get<bool>("use_cell_clip", false);
usePeephole = params.get<bool>("use_peephole", false);
reverse = params.get<bool>("reverse", false);
allocated = false;
outTailShape.clear();
}
void setUseTimstampsDim(bool use) CV_OVERRIDE
{
CV_Assert(!allocated);
useTimestampDim = use;
}
void setProduceCellOutput(bool produce) CV_OVERRIDE
{
CV_Assert(!allocated);
produceCellOutput = produce;
}
void setOutShape(const MatShape &outTailShape_) CV_OVERRIDE
{
CV_Assert(!allocated || total(outTailShape) == total(outTailShape_));
outTailShape = outTailShape_;
}
void setWeights(const Mat &Wh, const Mat &Wx, const Mat &bias) CV_OVERRIDE
{
CV_Assert(Wh.dims == 2 && Wx.dims == 2);
CV_Assert(Wh.rows == Wx.rows);
CV_Assert(Wh.rows == 4*Wh.cols);
CV_Assert(Wh.rows == (int)bias.total());
CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
blobs.resize(3);
blobs[0] = Mat(Wh.clone());
blobs[1] = Mat(Wx.clone());
blobs[2] = Mat(bias.clone()).reshape(1, 1);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6));
CV_Assert(inputs.size() == 1);
const MatShape& inp0 = inputs[0];
const Mat &Wh = blobs[0], &Wx = blobs[1];
int _numOut = Wh.size[1];
int _numInp = Wx.size[1];
MatShape outTailShape_(outTailShape), outResShape;
if (!outTailShape_.empty())
CV_Assert(total(outTailShape_) == _numOut);
else
outTailShape_.assign(1, _numOut);
int _numSamples;
if (useTimestampDim)
{
CV_Assert(inp0.size() >= 2 && total(inp0, 2) == _numInp);
_numSamples = inp0[1];
outResShape.push_back(inp0[0]);
}
else
{
CV_Assert(inp0.size() >= 2 && total(inp0, 1) == _numInp);
_numSamples = inp0[0];
}
outResShape.push_back(_numSamples);
outResShape.insert(outResShape.end(), outTailShape_.begin(), outTailShape_.end());
size_t noutputs = produceCellOutput ? 2 : 1;
outputs.assign(noutputs, outResShape);
internals.assign(1, shape(_numSamples, _numOut)); // hInternal
internals.push_back(shape(_numSamples, _numOut)); // cInternal
internals.push_back(shape(_numSamples, 1)); // dummyOnes
internals.push_back(shape(_numSamples, 4*_numOut)); // gates
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> input;
inputs_arr.getMatVector(input);
CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6));
CV_Assert(input.size() == 1);
const Mat& inp0 = input[0];
Mat &Wh = blobs[0], &Wx = blobs[1];
int numOut = Wh.size[1];
int numInp = Wx.size[1];
if (!outTailShape.empty())
CV_Assert(total(outTailShape) == numOut);
else
outTailShape.assign(1, numOut);
if (useTimestampDim)
{
CV_Assert(inp0.dims >= 2 && (int)inp0.total(2) == numInp);
numTimeStamps = inp0.size[0];
numSamples = inp0.size[1];
}
else
{
CV_Assert(inp0.dims >= 2 && (int)inp0.total(1) == numInp);
numTimeStamps = 1;
numSamples = inp0.size[0];
}
outTsShape.clear();
outTsShape.push_back(numSamples);
outTsShape.insert(outTsShape.end(), outTailShape.begin(), outTailShape.end());
allocated = true;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> input, output, internals;
inputs_arr.getMatVector(input);
outputs_arr.getMatVector(output);
internals_arr.getMatVector(internals);
const Mat &Wh = blobs[0];
const Mat &Wx = blobs[1];
const Mat &bias = blobs[2];
int numOut = Wh.size[1];
Mat hInternal = internals[0], cInternal = internals[1],
dummyOnes = internals[2], gates = internals[3];
hInternal.setTo(0.);
cInternal.setTo(0.);
dummyOnes.setTo(1.);
int numSamplesTotal = numTimeStamps*numSamples;
Mat xTs = input[0].reshape(1, numSamplesTotal);
Mat hOutTs = output[0].reshape(1, numSamplesTotal);
Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat();
int tsStart, tsEnd, tsInc;
if (reverse) {
tsStart = numTimeStamps - 1;
tsEnd = -1;
tsInc = -1;
}
else {
tsStart = 0;
tsEnd = numTimeStamps;
tsInc = 1;
}
for (int ts = tsStart; ts != tsEnd; ts += tsInc)
{
Range curRowRange(ts*numSamples, (ts + 1)*numSamples);
Mat xCurr = xTs.rowRange(curRowRange);
gemm(xCurr, Wx, 1, gates, 0, gates, GEMM_2_T); // Wx * x_t
gemm(hInternal, Wh, 1, gates, 1, gates, GEMM_2_T); //+Wh * h_{t-1}
gemm(dummyOnes, bias, 1, gates, 1, gates); //+b
Mat gateI = gates.colRange(0*numOut, 1*numOut);
Mat gateF = gates.colRange(1*numOut, 2*numOut);
Mat gateO = gates.colRange(2*numOut, 3*numOut);
Mat gateG = gates.colRange(3*numOut, 4*numOut);
if (forgetBias)
add(gateF, forgetBias, gateF);
if (usePeephole)
{
Mat gatesIF = gates.colRange(0, 2*numOut);
gemm(cInternal, blobs[3], 1, gateI, 1, gateI);
gemm(cInternal, blobs[4], 1, gateF, 1, gateF);
sigmoid(gatesIF, gatesIF);
}
else
{
Mat gatesIFO = gates.colRange(0, 3*numOut);
sigmoid(gatesIFO, gatesIFO);
}
tanh(gateG, gateG);
//compute c_t
multiply(gateF, cInternal, gateF); // f_t (*) c_{t-1}
multiply(gateI, gateG, gateI); // i_t (*) g_t
add(gateF, gateI, cInternal); // c_t = f_t (*) c_{t-1} + i_t (*) g_t
if (useCellClip)
{
min(cInternal, cellClip, cInternal);
max(cInternal, -cellClip, cInternal);
}
if (usePeephole)
{
gemm(cInternal, blobs[5], 1, gateO, 1, gateO);
sigmoid(gateO, gateO);
}
//compute h_t
tanh(cInternal, hInternal);
multiply(gateO, hInternal, hInternal);
//save results in output blobs
hInternal.copyTo(hOutTs.rowRange(curRowRange));
if (produceCellOutput)
cInternal.copyTo(cOutTs.rowRange(curRowRange));
}
}
};
Ptr<LSTMLayer> LSTMLayer::create(const LayerParams& params)
{
return Ptr<LSTMLayer>(new LSTMLayerImpl(params));
}
int LSTMLayer::inputNameToIndex(String inputName)
{
if (toLowerCase(inputName) == "x")
return 0;
return -1;
}
int LSTMLayer::outputNameToIndex(const String& outputName)
{
if (toLowerCase(outputName) == "h")
return 0;
else if (toLowerCase(outputName) == "c")
return 1;
return -1;
}
class RNNLayerImpl : public RNNLayer
{
int numX, numH, numO;
int numSamples, numTimestamps, numSamplesTotal;
int dtype;
Mat Whh, Wxh, bh;
Mat Who, bo;
bool produceH;
public:
RNNLayerImpl(const LayerParams& params)
: numX(0), numH(0), numO(0), numSamples(0), numTimestamps(0), numSamplesTotal(0), dtype(0)
{
setParamsFrom(params);
type = "RNN";
produceH = false;
}
void setProduceHiddenOutput(bool produce = false) CV_OVERRIDE
{
produceH = produce;
}
void setWeights(const Mat &W_xh, const Mat &b_h, const Mat &W_hh, const Mat &W_ho, const Mat &b_o) CV_OVERRIDE
{
CV_Assert(W_hh.dims == 2 && W_xh.dims == 2);
CV_Assert(W_hh.size[0] == W_xh.size[0] && W_hh.size[0] == W_hh.size[1] && (int)b_h.total() == W_xh.size[0]);
CV_Assert(W_ho.size[0] == (int)b_o.total());
CV_Assert(W_ho.size[1] == W_hh.size[1]);
blobs.resize(5);
blobs[0] = Mat(W_xh.clone());
blobs[1] = Mat(b_h.clone());
blobs[2] = Mat(W_hh.clone());
blobs[3] = Mat(W_ho.clone());
blobs[4] = Mat(b_o.clone());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() >= 1 && inputs.size() <= 2);
Mat Who_ = blobs[3];
Mat Wxh_ = blobs[0];
int numTimestamps_ = inputs[0][0];
int numSamples_ = inputs[0][1];
int numO_ = Who_.rows;
int numH_ = Wxh_.rows;
outputs.clear();
int dims[] = {numTimestamps_, numSamples_, numO_};
outputs.push_back(shape(dims, 3));
dims[2] = numH_;
if (produceH)
outputs.push_back(shape(dims, 3));
internals.assign(2, shape(numSamples_, numH_));
internals.push_back(shape(numSamples_, 1));
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> input, outputs;
inputs_arr.getMatVector(input);
CV_Assert(input.size() >= 1 && input.size() <= 2);
Wxh = blobs[0];
bh = blobs[1];
Whh = blobs[2];
Who = blobs[3];
bo = blobs[4];
numH = Wxh.rows;
numX = Wxh.cols;
numO = Who.rows;
const Mat& inp0 = input[0];
CV_Assert(inp0.dims >= 2);
CV_Assert(inp0.total(2) == numX);
dtype = CV_32F;
CV_Assert(inp0.type() == dtype);
numTimestamps = inp0.size[0];
numSamples = inp0.size[1];
numSamplesTotal = numTimestamps * numSamples;
bh = bh.reshape(1, 1); //is 1 x numH Mat
bo = bo.reshape(1, 1); //is 1 x numO Mat
}
void reshapeOutput(std::vector<Mat> &output)
{
output.resize(produceH ? 2 : 1);
int sz0[] = { numTimestamps, numSamples, numO };
output[0].create(3, sz0, dtype);
if (produceH)
{
int sz1[] = { numTimestamps, numSamples, numH };
output[1].create(3, sz1, dtype);
}
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> input, output, internals;
inputs_arr.getMatVector(input);
outputs_arr.getMatVector(output);
internals_arr.getMatVector(internals);
Mat xTs = input[0].reshape(1, numSamplesTotal);
Mat oTs = output[0].reshape(1, numSamplesTotal);
Mat hTs = produceH ? output[1].reshape(1, numSamplesTotal) : Mat();
Mat hCurr = internals[0];
Mat hPrev = internals[1];
Mat dummyBiasOnes = internals[2];
hPrev.setTo(0.);
dummyBiasOnes.setTo(1.);
for (int ts = 0; ts < numTimestamps; ts++)
{
Range curRowRange = Range(ts * numSamples, (ts + 1) * numSamples);
Mat xCurr = xTs.rowRange(curRowRange);
gemm(hPrev, Whh, 1, hCurr, 0, hCurr, GEMM_2_T); // W_{hh} * h_{prev}
gemm(xCurr, Wxh, 1, hCurr, 1, hCurr, GEMM_2_T); //+W_{xh} * x_{curr}
gemm(dummyBiasOnes, bh, 1, hCurr, 1, hCurr); //+bh
tanh(hCurr, hPrev);
Mat oCurr = oTs.rowRange(curRowRange);
gemm(hPrev, Who, 1, oCurr, 0, oCurr, GEMM_2_T); // W_{ho} * h_{prev}
gemm(dummyBiasOnes, bo, 1, oCurr, 1, oCurr); //+b_o
tanh(oCurr, oCurr);
if (produceH)
hPrev.copyTo(hTs.rowRange(curRowRange));
}
}
};
CV_EXPORTS_W Ptr<RNNLayer> RNNLayer::create(const LayerParams& params)
{
return Ptr<RNNLayer>(new RNNLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,422 @@
/*M ///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/dnn/all_layers.hpp>
#include "../nms.inl.hpp"
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/region.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class RegionLayerImpl CV_FINAL : public RegionLayer
{
public:
int coords, classes, anchors, classfix;
float thresh, nmsThreshold;
bool useSoftmax, useLogistic;
#ifdef HAVE_OPENCL
UMat blob_umat;
#endif
RegionLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
CV_Assert(blobs.size() == 1);
thresh = params.get<float>("thresh", 0.2);
coords = params.get<int>("coords", 4);
classes = params.get<int>("classes", 0);
anchors = params.get<int>("anchors", 5);
classfix = params.get<int>("classfix", 0);
useSoftmax = params.get<bool>("softmax", false);
useLogistic = params.get<bool>("logistic", false);
nmsThreshold = params.get<float>("nms_threshold", 0.4);
CV_Assert(nmsThreshold >= 0.);
CV_Assert(coords == 4);
CV_Assert(classes >= 1);
CV_Assert(anchors >= 1);
CV_Assert(useLogistic || useSoftmax);
if (params.get<bool>("softmax_tree", false))
CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() > 0);
// channels == cell_size*anchors
CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors);
int batch_size = inputs[0][0];
if(batch_size > 1)
outputs = std::vector<MatShape>(1, shape(batch_size, inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
else
outputs = std::vector<MatShape>(1, shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
return false;
}
float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); }
void softmax_activate(const float* input, const int n, const float temp, float* output)
{
int i;
float sum = 0;
float largest = -FLT_MAX;
for (i = 0; i < n; ++i) {
if (input[i] > largest) largest = input[i];
}
for (i = 0; i < n; ++i) {
float e = exp((input[i] - largest) / temp);
sum += e;
output[i] = e;
}
for (i = 0; i < n; ++i) {
output[i] /= sum;
}
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
if (blob_umat.empty())
blobs[0].copyTo(blob_umat);
std::vector<UMat> inputs;
std::vector<UMat> outputs;
// TODO: implement a logistic activation to classification scores.
if (useLogistic || inps.depth() == CV_16S)
return false;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
CV_Assert(inputs.size() >= 1);
int const cell_size = classes + coords + 1;
for (size_t ii = 0; ii < outputs.size(); ii++)
{
UMat& inpBlob = inputs[ii];
UMat& outBlob = outputs[ii];
int batch_size = inpBlob.size[0];
int rows = inpBlob.size[1];
int cols = inpBlob.size[2];
// channels == cell_size*anchors, see l. 94
int sample_size = cell_size*rows*cols*anchors;
ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);
size_t nanchors = rows*cols*anchors*batch_size;
logistic_kernel.set(0, (int)nanchors);
logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
logistic_kernel.set(2, (int)cell_size);
logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));
logistic_kernel.run(1, &nanchors, NULL, false);
if (useSoftmax)
{
// Yolo v2
// softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);
size_t nanchors = rows*cols*anchors*batch_size;
softmax_kernel.set(0, (int)nanchors);
softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));
softmax_kernel.set(3, (int)cell_size);
softmax_kernel.set(4, (int)classes);
softmax_kernel.set(5, (int)classfix);
softmax_kernel.set(6, (int)rows);
softmax_kernel.set(7, (int)cols);
softmax_kernel.set(8, (int)anchors);
softmax_kernel.set(9, (float)thresh);
softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));
if (!softmax_kernel.run(1, &nanchors, NULL, false))
return false;
}
if (nmsThreshold > 0) {
Mat mat = outBlob.getMat(ACCESS_WRITE);
float *dstData = mat.ptr<float>();
for (int b = 0; b < batch_size; ++b)
do_nms_sort(dstData + b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
}
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
CV_Assert(inputs.size() >= 1);
CV_Assert(outputs.size() == 1);
int const cell_size = classes + coords + 1;
const float* biasData = blobs[0].ptr<float>();
for (size_t ii = 0; ii < outputs.size(); ii++)
{
Mat &inpBlob = inputs[ii];
Mat &outBlob = outputs[ii];
int batch_size = inpBlob.size[0];
int rows = inpBlob.size[1];
int cols = inpBlob.size[2];
// address length for one image in batch, both for input and output
int sample_size = cell_size*rows*cols*anchors;
// assert that the comment above is true
CV_Assert(sample_size*batch_size == inpBlob.total());
CV_Assert(sample_size*batch_size == outBlob.total());
CV_Assert(inputs.size() < 2 || inputs[1].dims == 4);
int hNorm = inputs.size() > 1 ? inputs[1].size[2] : rows;
int wNorm = inputs.size() > 1 ? inputs[1].size[3] : cols;
const float *srcData = inpBlob.ptr<float>();
float *dstData = outBlob.ptr<float>();
// logistic activation for t0, for each grid cell (X x Y x Anchor-index)
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
int index = cell_size*i;
float x = srcData[index + 4];
dstData[index + 4] = logistic_activate(x); // logistic activation
}
if (useSoftmax) { // Yolo v2
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
int index = cell_size*i;
softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
}
}
else if (useLogistic) { // Yolo v3
for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
int index = cell_size*i;
const float* input = srcData + index + 5;
float* output = dstData + index + 5;
for (int c = 0; c < classes; ++c)
output[c] = logistic_activate(input[c]);
}
}
for (int b = 0; b < batch_size; ++b)
for (int x = 0; x < cols; ++x)
for(int y = 0; y < rows; ++y)
for (int a = 0; a < anchors; ++a) {
// relative start address for image b within the batch data
int index_sample_offset = sample_size*b;
int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor
int p_index = index_sample_offset + index * cell_size + 4;
float scale = dstData[p_index];
if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0;
int box_index = index_sample_offset + index * cell_size;
dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols;
dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows;
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
int class_index = index_sample_offset + index * cell_size + 5;
for (int j = 0; j < classes; ++j) {
float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
}
}
if (nmsThreshold > 0) {
for (int b = 0; b < batch_size; ++b){
do_nms_sort(dstData+b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
}
}
}
}
void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
{
std::vector<Rect2d> boxes(total);
std::vector<float> scores(total);
for (int i = 0; i < total; ++i)
{
Rect2d &b = boxes[i];
int box_index = i * (classes + coords + 1);
b.width = detections[box_index + 2];
b.height = detections[box_index + 3];
b.x = detections[box_index + 0] - b.width / 2;
b.y = detections[box_index + 1] - b.height / 2;
}
std::vector<int> indices;
for (int k = 0; k < classes; ++k)
{
for (int i = 0; i < total; ++i)
{
int box_index = i * (classes + coords + 1);
int class_index = box_index + 5;
scores[i] = detections[class_index + k];
detections[class_index + k] = 0;
}
NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
for (int i = 0, n = indices.size(); i < n; ++i)
{
int box_index = indices[i] * (classes + coords + 1);
int class_index = box_index + 5;
detections[class_index + k] = scores[indices[i]];
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
if (coords != 4)
CV_Error(Error::StsNotImplemented, "Only upright rectangular boxes are supported in RegionLayer.");
std::size_t height_norm, width_norm;
if (inputs.size() == 1)
{
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto input_shape = input_wrapper->getShape();
height_norm = input_shape[1];
width_norm = input_shape[2];
}
else
{
auto input_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
auto input_shape = input_wrapper->getShape();
CV_Assert(input_shape.size() == 4);
height_norm = input_shape[2];
width_norm = input_shape[3];
}
cuda4dnn::SquashMethod squash_method;
if(useLogistic)
squash_method = cuda4dnn::SquashMethod::SIGMOID;
else if (useSoftmax)
squash_method = cuda4dnn::SquashMethod::SOFTMAX;
/* exactly one must be true */
CV_Assert((useLogistic || useSoftmax) && !(useLogistic && useSoftmax));
cuda4dnn::RegionConfiguration<float> config;
config.squash_method = squash_method;
config.classes = classes;
config.boxes_per_cell = anchors;
config.height_norm = height_norm;
config.width_norm = width_norm;
config.object_prob_cutoff = (classfix == -1) ? 0.5 : 0.0;
config.class_prob_cutoff = thresh;
config.nms_iou_threshold = nmsThreshold;
return make_cuda_node<cuda4dnn::RegionOp>(preferableTarget, std::move(context->stream), blobs[0], config);
}
#endif
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
int64 flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 60*total(inputs[i]);
}
return flops;
}
};
Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)
{
return Ptr<RegionLayer>(new RegionLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@@ -0,0 +1,251 @@
/*M ///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#include <ngraph/op/experimental/layers/reorg_yolo.hpp>
#endif
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/dnn/all_layers.hpp>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/reorg.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class ReorgLayerImpl CV_FINAL : public ReorgLayer
{
int reorgStride;
public:
ReorgLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
reorgStride = params.get<int>("reorg_stride", 2);
CV_Assert(reorgStride > 0);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() > 0);
outputs = std::vector<MatShape>(inputs.size(), shape(
inputs[0][0],
inputs[0][1] * reorgStride * reorgStride,
inputs[0][2] / reorgStride,
inputs[0][3] / reorgStride));
CV_Assert(outputs[0][0] > 0 && outputs[0][1] > 0 && outputs[0][2] > 0 && outputs[0][3] > 0);
CV_Assert(total(outputs[0]) == total(inputs[0]));
return false;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
Mat inp = inputs[0];
Mat out = outputs[0];
int batchSize = inp.size[0];
LayerParams permParams;
if (batchSize == 1)
{
int order[] = {1, 3, 0, 2};
permParams.set("order", DictValue::arrayInt(&order[0], 4));
permuteInpShape.resize(4);
permuteInpShape[0] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride); // (channels*height)/(r*r)
permuteInpShape[1] = reorgStride;
permuteInpShape[2] = inp.size[3]; // width
permuteInpShape[3] = reorgStride;
permuteOutShape.resize(4);
for (int i = 0; i < 4; ++i)
permuteOutShape[i] = permuteInpShape[order[i]];
}
else
{
int order[] = {0, 2, 4, 1, 3};
permParams.set("order", DictValue::arrayInt(&order[0], 5));
permuteInpShape.resize(5);
permuteInpShape[0] = batchSize;
permuteInpShape[1] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride); // (channels*height)/(r*r)
permuteInpShape[2] = reorgStride;
permuteInpShape[3] = inp.size[3]; // width
permuteInpShape[4] = reorgStride;
permuteOutShape.resize(5);
for (int i = 0; i < 5; ++i)
permuteOutShape[i] = permuteInpShape[order[i]];
}
permute = PermuteLayer::create(permParams);
std::vector<Mat> permuteInputs(1, inp.reshape(1, permuteInpShape));
std::vector<Mat> permuteOutputs(1, out.reshape(1, permuteOutShape));
permute->finalize(permuteInputs, permuteOutputs);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]);
outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]);
permute->preferableTarget = preferableTarget;
permute->forward(inputs, outputs, internals);
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
inputs[0] = inputs[0].reshape(1, permuteInpShape);
outputs[0] = outputs[0].reshape(1, permuteOutShape);
permute->forward(inputs, outputs, internals_arr);
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::ReorgOp>(preferableTarget, std::move(context->stream), reorgStride);
}
#endif
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::ReorgYoloLayer ieLayer(name);
ieLayer.setStride(reorgStride);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto reorg = std::make_shared<ngraph::op::ReorgYolo>(ieInpNode, ngraph::Strides{(size_t)reorgStride});
return Ptr<BackendNode>(new InfEngineNgraphNode(reorg));
}
#endif // HAVE_DNN_NGRAPH
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
int64 flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 21*total(inputs[i]);
}
return flops;
}
private:
Ptr<PermuteLayer> permute;
std::vector<int> permuteInpShape, permuteOutShape;
};
Ptr<ReorgLayer> ReorgLayer::create(const LayerParams& params)
{
return Ptr<ReorgLayer>(new ReorgLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@@ -0,0 +1,318 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/reshape.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
static void computeShapeByReshapeMask(const MatShape &srcShape,
const MatShape &maskShape,
Range srcRange /*= Range::all()*/,
MatShape& dstShape)
{
int srcShapeSize = (int)srcShape.size();
int maskShapeSize = (int)maskShape.size();
if (srcRange == Range::all())
srcRange = Range(0, srcShapeSize);
else
{
int sz = srcRange.size();
srcRange.start = clamp(srcRange.start, srcShapeSize);
srcRange.end = srcRange.end == INT_MAX ? srcShapeSize : srcRange.start + sz;
}
bool explicitMask = !maskShape.empty(); // All mask values are positive.
for (int i = 0, n = maskShape.size(); i < n && explicitMask; ++i)
{
explicitMask = maskShape[i] > 0;
}
// Working range of source shape is a range where area(src) == area(mask).
if (explicitMask)
{
int maskTotal = total(maskShape);
// Go from the end of mask until we collect required total.
bool matched = false;
for (int i = srcRange.end - 1; i >= srcRange.start; --i)
{
if (matched)
{
if (total(srcShape, i, srcRange.end) != maskTotal)
{
srcRange.start = i + 1;
break;
}
else if (i == 0)
{
srcRange.start = 0;
break;
}
}
else
{
matched = total(srcShape, i, srcRange.end) == maskTotal;
}
}
while (total(srcShape, srcRange.start, srcRange.end) != maskTotal && srcRange.start > 0)
{
srcRange.start -= 1;
}
CV_Assert(total(srcShape, srcRange.start, srcRange.end) == maskTotal);
}
CV_Assert(0 <= srcRange.start && srcRange.start <= srcRange.end && srcRange.end <= srcShapeSize);
int dstShapeSize = srcShapeSize - srcRange.size() + maskShapeSize;
dstShape.resize(dstShapeSize);
std::copy(srcShape.begin(), srcShape.begin() + srcRange.start, dstShape.begin());
std::copy(srcShape.begin() + srcRange.end, srcShape.begin() + srcShapeSize, dstShape.begin() + srcRange.start + maskShapeSize);
int inferDim = -1;
for (int i = 0; i < maskShapeSize; i++)
{
if (maskShape[i] > 0)
{
dstShape[srcRange.start + i] = maskShape[i];
}
else if (maskShape[i] == 0)
{
if (srcRange.start + i >= srcShapeSize)
CV_Error(Error::StsBadArg, format("Copy dim[%d] (which has zero size) is out of the source shape bounds", srcRange.start + i));
dstShape[srcRange.start + i] = srcShape[srcRange.start + i];
}
else if (maskShape[i] == -1)
{
if (inferDim != -1)
CV_Error(Error::StsAssert, "Duplicate of inferred dim (which is denoted by -1)");
inferDim = srcRange.start + i;
dstShape[inferDim] = 1;
}
else
CV_Error(Error::StsBadArg, "maskShape[i] >= -1");
}
size_t srcTotal = total(srcShape);
size_t dstTotal = total(dstShape);
CV_Assert(dstTotal != 0);
if (inferDim != -1)
{
if (srcTotal % dstTotal != 0)
CV_Error(Error::StsBackTrace, "Can't infer a dim denoted by -1");
dstShape[inferDim] = (int)(srcTotal / dstTotal);
}
else
{
CV_Assert(srcTotal == dstTotal);
}
}
class ReshapeLayerImpl CV_FINAL : public ReshapeLayer
{
public:
ReshapeLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
int axis = params.get<int>("axis", 0);
int numAxes = params.get<int>("num_axes", -1);
CV_Assert(numAxes >= -1);
newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes);
newShapeDesc.clear();
if (params.has("dim"))
{
const DictValue &paramShape = params.get("dim");
int i, dims = paramShape.size();
newShapeDesc.resize(dims);
for (i = 0; i < dims; i++)
newShapeDesc[i] = paramShape.get<int>(i);
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
if (inputs.size() == 1 || inputs.size() == requiredOutputs)
{
outputs.clear();
for (size_t i = 0; i < inputs.size(); i++)
{
outputs.push_back(MatShape());
computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
}
}
else
{
CV_Assert_N(inputs.size() == 2, total(inputs[0]) == total(inputs[1]));
outputs.assign(1, inputs[1]);
}
return true;
}
void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> outputs;
outputs_arr.getMatVector(outputs);
CV_Assert(!outputs.empty());
outShapes.resize(outputs.size());
for (int i = 0; i < outputs.size(); ++i)
outShapes[i] = shape(outputs[i]);
}
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
for (size_t i = 0; i < outputs.size(); i++)
{
UMat srcBlob = inputs[i];
void *src_handle = inputs[i].handle(ACCESS_READ);
void *dst_handle = outputs[i].handle(ACCESS_WRITE);
if (src_handle != dst_handle)
{
UMat umat = srcBlob.reshape(1, (int)outShapes[i].size(), &outShapes[i][0]);
umat.copyTo(outputs[i]);
}
}
outs.assign(outputs);
return true;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
for (size_t i = 0; i < outputs.size(); i++)
{
Mat srcBlob = inputs[i];
if (outputs[i].data != srcBlob.data)
srcBlob.reshape(1, shape(outputs[i])).copyTo(outputs[i]);
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
}
#endif
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::Builder::ReshapeLayer ieLayer(name);
CV_Assert(outShapes.size() == 1);
ieLayer.setDims(outShapes[0]);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert(outShapes.size() == 1);
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<int64_t> out(outShapes[0].begin(), outShapes[0].end());
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{out.size()}, out.data());
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, shape, true);
return Ptr<BackendNode>(new InfEngineNgraphNode(reshape));
}
#endif // HAVE_DNN_NGRAPH
private:
std::vector<MatShape> outShapes;
};
Ptr<ReshapeLayer> ReshapeLayer::create(const LayerParams& params)
{
return Ptr<ReshapeLayer>(new ReshapeLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,361 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include <opencv2/imgproc.hpp>
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#include <ngraph/op/experimental/layers/interpolate.hpp>
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/resize.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv { namespace dnn {
class ResizeLayerImpl : public ResizeLayer
{
public:
ResizeLayerImpl(const LayerParams& params) : zoomFactorWidth(0), zoomFactorHeight(0), scaleWidth(0), scaleHeight(0)
{
setParamsFrom(params);
outWidth = params.get<float>("width", 0);
outHeight = params.get<float>("height", 0);
if (params.has("zoom_factor"))
{
CV_Assert(!params.has("zoom_factor_x") && !params.has("zoom_factor_y"));
zoomFactorWidth = zoomFactorHeight = params.get<int>("zoom_factor");
}
else if (params.has("zoom_factor_x") || params.has("zoom_factor_y"))
{
CV_Assert(params.has("zoom_factor_x") && params.has("zoom_factor_y"));
zoomFactorWidth = params.get<int>("zoom_factor_x");
zoomFactorHeight = params.get<int>("zoom_factor_y");
}
interpolation = params.get<String>("interpolation");
CV_Assert(interpolation == "nearest" || interpolation == "bilinear");
alignCorners = params.get<bool>("align_corners", false);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert_N(inputs.size() == 1, inputs[0].size() == 4);
outputs.resize(1, inputs[0]);
outputs[0][2] = outHeight > 0 ? outHeight : (outputs[0][2] * zoomFactorHeight);
outputs[0][3] = outWidth > 0 ? outWidth : (outputs[0][3] * zoomFactorWidth);
// We can work in-place (do nothing) if input shape == output shape.
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
if (backendId == DNN_BACKEND_CUDA)
return interpolation == "nearest" || interpolation == "bilinear";
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
return (interpolation == "nearest" && scaleWidth == scaleHeight) ||
(interpolation == "bilinear");
}
#endif
return backendId == DNN_BACKEND_OPENCV;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
if (!outWidth && !outHeight)
{
outHeight = outputs[0].size[2];
outWidth = outputs[0].size[3];
}
if (alignCorners && outHeight > 1)
scaleHeight = static_cast<float>(inputs[0].size[2] - 1) / (outHeight - 1);
else
scaleHeight = static_cast<float>(inputs[0].size[2]) / outHeight;
if (alignCorners && outWidth > 1)
scaleWidth = static_cast<float>(inputs[0].size[3] - 1) / (outWidth - 1);
else
scaleWidth = static_cast<float>(inputs[0].size[3]) / outWidth;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
if (outHeight == inputs[0].size[2] && outWidth == inputs[0].size[3])
return;
Mat& inp = inputs[0];
Mat& out = outputs[0];
if (interpolation == "nearest")
{
for (size_t n = 0; n < inputs[0].size[0]; ++n)
{
for (size_t ch = 0; ch < inputs[0].size[1]; ++ch)
{
resize(getPlane(inp, n, ch), getPlane(out, n, ch),
Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
}
}
}
else if (interpolation == "bilinear")
{
const int inpHeight = inp.size[2];
const int inpWidth = inp.size[3];
const int inpSpatialSize = inpHeight * inpWidth;
const int outSpatialSize = outHeight * outWidth;
const int numPlanes = inp.size[0] * inp.size[1];
CV_Assert_N(inp.isContinuous(), out.isContinuous());
Mat inpPlanes = inp.reshape(1, numPlanes * inpHeight);
Mat outPlanes = out.reshape(1, numPlanes * outHeight);
for (int y = 0; y < outHeight; ++y)
{
float input_y = y * scaleHeight;
int y0 = static_cast<int>(input_y);
const float* inpData_row0 = inpPlanes.ptr<float>(y0);
const float* inpData_row1 = inpPlanes.ptr<float>(std::min(y0 + 1, inpHeight - 1));
for (int x = 0; x < outWidth; ++x)
{
float input_x = x * scaleWidth;
int x0 = static_cast<int>(input_x);
int x1 = std::min(x0 + 1, inpWidth - 1);
float* outData = outPlanes.ptr<float>(y, x);
const float* inpData_row0_c = inpData_row0;
const float* inpData_row1_c = inpData_row1;
for (int c = 0; c < numPlanes; ++c)
{
*outData = inpData_row0_c[x0] +
(input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
(input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
(input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0]));
inpData_row0_c += inpSpatialSize;
inpData_row1_c += inpSpatialSize;
outData += outSpatialSize;
}
}
}
}
else
CV_Error(Error::StsNotImplemented, "Unknown interpolation: " + interpolation);
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
cuda4dnn::InterpolationType itype;
if (interpolation == "nearest")
itype = InterpolationType::NEAREST_NEIGHBOUR;
else if (interpolation == "bilinear")
itype = InterpolationType::BILINEAR;
else
CV_Error(Error::StsNotImplemented, "Requested interpolation mode is not available in resize layer.");
return make_cuda_node<cuda4dnn::ResizeOp>(preferableTarget, std::move(context->stream), itype, scaleHeight, scaleWidth);
}
#endif
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
if (interpolation == "nearest")
{
ieLayer.setType("Resample");
ieLayer.getParameters()["type"] = std::string("caffe.ResampleParameter.NEAREST");
ieLayer.getParameters()["antialias"] = false;
if (scaleWidth != scaleHeight)
CV_Error(Error::StsNotImplemented, "resample with sw != sh");
ieLayer.getParameters()["factor"] = 1.0f / scaleWidth;
}
else if (interpolation == "bilinear")
{
ieLayer.setType("Interp");
ieLayer.getParameters()["pad_beg"] = 0;
ieLayer.getParameters()["pad_end"] = 0;
ieLayer.getParameters()["align_corners"] = false;
}
else
CV_Error(Error::StsNotImplemented, "Unsupported interpolation: " + interpolation);
ieLayer.getParameters()["width"] = outWidth;
ieLayer.getParameters()["height"] = outHeight;
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
ngraph::op::InterpolateAttrs attrs;
attrs.pads_begin.push_back(0);
attrs.pads_end.push_back(0);
attrs.axes = ngraph::AxisSet{2, 3};
attrs.align_corners = false;
if (interpolation == "nearest") {
attrs.mode = "nearest";
attrs.antialias = false;
} else if (interpolation == "bilinear") {
attrs.mode = "linear";
} else {
CV_Error(Error::StsNotImplemented, "Unsupported interpolation: " + interpolation);
}
std::vector<int64_t> shape = {outHeight, outWidth};
auto out_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shape.data());
auto interp = std::make_shared<ngraph::op::Interpolate>(ieInpNode, out_shape, attrs);
return Ptr<BackendNode>(new InfEngineNgraphNode(interp));
}
#endif // HAVE_DNN_NGRAPH
protected:
int outWidth, outHeight, zoomFactorWidth, zoomFactorHeight;
String interpolation;
float scaleWidth, scaleHeight;
bool alignCorners;
};
Ptr<ResizeLayer> ResizeLayer::create(const LayerParams& params)
{
return Ptr<ResizeLayer>(new ResizeLayerImpl(params));
}
class InterpLayerImpl CV_FINAL : public ResizeLayerImpl
{
public:
InterpLayerImpl(const LayerParams& params) : ResizeLayerImpl(params) {}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert_N(inputs.size() == 1, inputs[0].size() == 4);
outputs.resize(1, inputs[0]);
outputs[0][2] = outHeight > 0 ? outHeight : (1 + zoomFactorHeight * (outputs[0][2] - 1));
outputs[0][3] = outWidth > 0 ? outWidth : (1 + zoomFactorWidth * (outputs[0][3] - 1));
// We can work in-place (do nothing) if input shape == output shape.
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019
|| backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return true;
#endif
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
if (!outWidth && !outHeight)
{
outHeight = outputs[0].size[2];
outWidth = outputs[0].size[3];
}
int inpHeight = inputs[0].size[2];
int inpWidth = inputs[0].size[3];
scaleHeight = (outHeight > 1) ? (static_cast<float>(inpHeight - 1) / (outHeight - 1)) : 0.f;
scaleWidth = (outWidth > 1) ? (static_cast<float>(inpWidth - 1) / (outWidth - 1)) : 0.f;
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
ieLayer.setType("Interp");
ieLayer.getParameters()["pad_beg"] = 0;
ieLayer.getParameters()["pad_end"] = 0;
ieLayer.getParameters()["width"] = outWidth;
ieLayer.getParameters()["height"] = outHeight;
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
ngraph::op::InterpolateAttrs attrs;
attrs.pads_begin.push_back(0);
attrs.pads_end.push_back(0);
attrs.axes = ngraph::AxisSet{2, 3};
attrs.mode = "linear";
std::vector<int64_t> shape = {outHeight, outWidth};
auto out_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shape.data());
auto interp = std::make_shared<ngraph::op::Interpolate>(ieInpNode, out_shape, attrs);
return Ptr<BackendNode>(new InfEngineNgraphNode(interp));
}
#endif // HAVE_DNN_NGRAPH
};
Ptr<Layer> InterpLayer::create(const LayerParams& params)
{
LayerParams lp(params);
lp.set("interpolation", "bilinear");
return Ptr<Layer>(new InterpLayerImpl(lp));
}
} // namespace dnn
} // namespace cv

View File

@@ -0,0 +1,325 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of Scale layer.
*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/scale_shift.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class ScaleLayerImpl CV_FINAL : public ScaleLayer
{
public:
ScaleLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
hasBias = params.get<bool>("bias_term", false);
axis = params.get<int>("axis", 1);
hasWeights = false;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
outputs.assign(1, inputs[0]);
return true;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
hasWeights = blobs.size() == 2 || (blobs.size() == 1 && !hasBias);
CV_Assert((inputs.size() == 2 && blobs.empty()) || blobs.size() == (int)hasWeights + (int)hasBias);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_HALIDE ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1);
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert_N(outputs.size() == 1, !blobs.empty() || inputs.size() == 2);
Mat &inpBlob = inputs[0];
Mat &outBlob = outputs[0];
// There is a mode when we multiply a first blob by a second one
// instead of trainable weights.
Mat weights = blobs.empty() ? inputs[1] : (hasWeights ? blobs[0] : Mat());
Mat bias = hasBias ? blobs.back().reshape(1, 1) : Mat();
if (!weights.empty())
weights = weights.reshape(1, 1);
MatShape inpShape = shape(inpBlob);
const int numWeights = !weights.empty() ? weights.total() : bias.total();
CV_Assert(numWeights != 0);
if (hasWeights && hasBias)
CV_CheckEQ(weights.total(), bias.total(), "Incompatible weights/bias blobs");
int endAxis;
for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis)
{
if (total(inpShape, axis, endAxis) == numWeights)
break;
}
CV_Assert(total(inpShape, axis, endAxis) == numWeights);
CV_Assert(!hasBias || numWeights == bias.total());
CV_CheckTypeEQ(inpBlob.type(), CV_32FC1, ""); CV_CheckTypeEQ(outBlob.type(), CV_32FC1, "");
int numSlices = total(inpShape, 0, axis);
float* inpData = (float*)inpBlob.data;
float* outData = (float*)outBlob.data;
if (endAxis != inpBlob.dims)
{
float* weightsData = !weights.empty() ? (float*)weights.data : 0;
float* biasesData = hasBias ? (float*)bias.data : 0;
int spatialSize = total(inpShape, endAxis); // spatialSize != 1
for (int i = 0; i < numSlices; ++i)
{
for (int j = 0; j < numWeights; ++j)
{
float w = weightsData ? weightsData[j] : 1;
float b = biasesData ? biasesData[j] : 0;
Mat inpSlice(1, spatialSize, CV_32F, inpData);
Mat outSlice(1, spatialSize, CV_32F, outData);
inpSlice.convertTo(outSlice, CV_32F, w, b);
inpData += spatialSize;
outData += spatialSize;
}
}
}
else
{
for (int i = 0; i < numSlices; ++i)
{
Mat inpSlice(1, numWeights, CV_32F, inpData);
Mat outSlice(1, numWeights, CV_32F, outData);
if (!weights.empty())
{
multiply(inpSlice, weights, outSlice);
if (hasBias)
add(outSlice, bias, outSlice);
}
else if (hasBias)
add(inpSlice, bias, outSlice);
inpData += numWeights;
outData += numWeights;
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
CV_Assert(!blobs.empty() || inputs.size() == 2);
cv::Mat weightsMat = hasWeights ? blobs[0] : Mat();
/* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0]
* in either case, it is at the end of the blobs vector => bias = blobs.back()
*/
cv::Mat biasMat = hasBias ? blobs.back() : Mat();
return make_cuda_node<cuda4dnn::ScaleShiftOp>(preferableTarget, std::move(context->stream), axis, weightsMat, biasMat);
}
#endif
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
{
switch (node->backendId)
{
case DNN_BACKEND_HALIDE:
{
#ifdef HAVE_HALIDE
auto base = node.dynamicCast<HalideBackendNode>();
Halide::Func& input = base->funcs.back();
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = attachHalide(input(x, y, c, n));
return Ptr<BackendNode>(new HalideBackendNode(base, top));
#endif // HAVE_HALIDE
break;
}
}
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
Halide::Buffer<float> input = halideBuffer(inputs[0]);
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = attachHalide(input(x, y, c, n));
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_HALIDE
// attachHalide can work both with Halide::Buffer and Halide::Func. In the
// second case it will be a fusion.
Halide::Func attachHalide(const Halide::Expr& input)
{
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Var x("x"), y("y"), c("c"), n("n");
const int numChannels = blobs[0].total();
Halide::Expr topExpr = input;
if (hasWeights)
{
auto weights = wrapToHalideBuffer(blobs[0], {numChannels});
topExpr *= weights(c);
}
if (hasBias)
{
auto bias = wrapToHalideBuffer(blobs.back(), {numChannels});
topExpr += bias(c);
}
top(x, y, c, n) = topExpr;
return top;
}
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::Layer l = InferenceEngine::Builder::ScaleShiftLayer(name);
CV_Assert(!blobs.empty());
const size_t numChannels = blobs[0].total();
if (hasWeights)
{
addConstantData("weights", wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C), l);
}
else
{
auto weights = InferenceEngine::make_shared_blob<float>({
InferenceEngine::Precision::FP32, {(size_t)numChannels},
InferenceEngine::Layout::C
});
weights->allocate();
float* buf = weights->buffer().as<float*>();
std::fill(buf, buf + numChannels, 1);
addConstantData("weights", weights, l);
}
if (hasBias)
addConstantData("biases", wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C), l);
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert(!blobs.empty());
const size_t numChannels = blobs[0].total();
auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
shape[1] = numChannels;
auto weight = hasWeights ?
std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
ngraph::Shape(shape), blobs[0].data) :
std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
ngraph::Shape(shape), std::vector<float>(numChannels, 1).data());
auto bias = hasBias ?
std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
ngraph::Shape(shape), blobs.back().data) :
std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
ngraph::Shape(shape), std::vector<float>(numChannels, 0).data());
auto scale_node = std::make_shared<ngraph::op::v1::Multiply>(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY);
auto scale_shift = std::make_shared<ngraph::op::v1::Add>(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
return Ptr<BackendNode>(new InfEngineNgraphNode(scale_shift));
}
#endif // HAVE_DNN_NGRAPH
void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
{
scale = hasWeights ? blobs[0] : Mat();
shift = hasBias ? blobs.back() : Mat();
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
long flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 2*total(inputs[i]);
}
return flops;
}
private:
bool hasWeights;
};
Ptr<ScaleLayer> ScaleLayer::create(const LayerParams& params)
{
return Ptr<ScaleLayer>(new ScaleLayerImpl(params));
}
Ptr<Layer> ShiftLayer::create(const LayerParams& params)
{
LayerParams scaleParams;
scaleParams.name = params.name;
scaleParams.type = "Scale";
scaleParams.blobs = params.blobs;
scaleParams.set("bias_term", true);
scaleParams.set("axis", 0);
return Ptr<ScaleLayer>(new ScaleLayerImpl(scaleParams));
}
} // namespace dnn
} // namespace cv

View File

@@ -0,0 +1,161 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/shuffle_channel.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv { namespace dnn {
class ShuffleChannelLayerImpl CV_FINAL : public ShuffleChannelLayer
{
public:
ShuffleChannelLayerImpl(const LayerParams& params)
{
group = params.get<int>("group", 1);
setParamsFrom(params);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1 && inputs[0].size() == 4);
CV_Assert(inputs[0][1] % group == 0);
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return group == 1;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
if (group != 1)
{
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
LayerParams lp;
float order[] = {0, 2, 1, 3};
lp.set("order", DictValue::arrayInt(&order[0], 4));
permute = PermuteLayer::create(lp);
const Mat& inp = inputs[0];
const Mat& out = outputs[0];
permuteInpShape.resize(4);
permuteInpShape[0] = inp.size[0];
permuteInpShape[1] = group;
permuteInpShape[2] = inp.size[1] / group;
permuteInpShape[3] = inp.size[2]*inp.size[3];
permuteOutShape.resize(4);
permuteOutShape[0] = permuteInpShape[0];
permuteOutShape[1] = permuteInpShape[2];
permuteOutShape[2] = permuteInpShape[1];
permuteOutShape[3] = permuteInpShape[3];
std::vector<Mat> permuteInputs(1, inp.reshape(1, permuteInpShape));
std::vector<Mat> permuteOutputs(1, out.reshape(1, permuteOutShape));
permute->finalize(permuteInputs, permuteOutputs);
}
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (inputs[0].u != outputs[0].u)
{
if (!permute.empty())
{
inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]);
outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]);
permute->preferableTarget = preferableTarget;
permute->forward(inputs, outputs, internals);
}
else
inputs[0].copyTo(outputs[0]);
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
Mat inp = inputs[0];
Mat out = outputs[0];
if (inp.data != out.data)
{
if (!permute.empty())
{
inp = inp.reshape(1, permuteInpShape);
out = out.reshape(1, permuteOutShape);
std::vector<Mat> permuteInputs(1, inp);
std::vector<Mat> permuteOutputs(1, out);
permute->forward(permuteInputs, permuteOutputs, internals);
}
else
inp.copyTo(out);
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::ShuffleChannelOp>(preferableTarget, std::move(context->stream), group);
}
#endif
private:
Ptr<PermuteLayer> permute;
std::vector<int> permuteInpShape, permuteOutShape;
};
Ptr<Layer> ShuffleChannelLayer::create(const LayerParams& params)
{
return Ptr<Layer>(new ShuffleChannelLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@@ -0,0 +1,473 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "layers_common.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/slice.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class SliceLayerImpl : public SliceLayer
{
public:
SliceLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
axis = params.get<int>("axis", 1);
num_split = params.get<int>("num_split", 0);
if (params.has("slice_point"))
{
CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
const DictValue &indicesValue = params.get("slice_point");
sliceRanges.resize(indicesValue.size() + 1,
std::vector<Range>(axis + 1, Range::all()));
int prevSlice = 0;
for (int i = 0; i < indicesValue.size(); ++i)
{
sliceRanges[i][axis].start = prevSlice;
sliceRanges[i][axis].end = indicesValue.get<int>(i);
prevSlice = sliceRanges[i][axis].end;
}
sliceRanges.back()[axis].start = prevSlice;
}
else if (params.has("begin"))
{
CV_Assert(params.has("size") ^ params.has("end"));
const DictValue &begins = params.get("begin");
const DictValue &sizesOrEnds = params.has("size") ? params.get("size") : params.get("end");
CV_Assert(begins.size() == sizesOrEnds.size());
sliceRanges.resize(1);
sliceRanges[0].resize(begins.size(), Range::all());
for (int i = 0; i < begins.size(); ++i)
{
int start = begins.get<int>(i);
int sizeOrEnd = sizesOrEnds.get<int>(i); // It may be negative to reverse indexation.
CV_Assert(start >= 0);
sliceRanges[0][i].start = start;
if (params.has("size"))
{
int size = sizeOrEnd;
CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size).
sliceRanges[0][i].end = size > 0 ? (start + size) : -1; // We'll finalize a negative value later.
}
else
{
int end = sizeOrEnd;
CV_Assert(end < 0 || end > start); // End index is excluded.
sliceRanges[0][i].end = end; // We'll finalize a negative value later.
}
}
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
#ifdef HAVE_INF_ENGINE
INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) &&
#endif
sliceRanges.size() == 1 && sliceRanges[0].size() == 4);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
MatShape inpShape = inputs[0];
if (!sliceRanges.empty())
{
outputs.resize(sliceRanges.size(), inpShape);
for (int i = 0; i < outputs.size(); ++i)
{
CV_Assert(sliceRanges[i].size() <= inpShape.size());
for (int j = 0; j < sliceRanges[i].size(); ++j)
{
outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
}
}
}
else // Divide input blob on equal parts by axis.
{
CV_Assert(0 <= axis && axis < inpShape.size());
int splits = num_split ? num_split : requiredOutputs;
CV_Assert(splits > 0 && inpShape[axis] % splits == 0);
inpShape[axis] /= splits;
outputs.resize(splits, inpShape);
}
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(inputs.size() == 1);
const MatSize& inpShape = inputs[0].size;
if (sliceRanges.empty())
{
// Divide input blob on equal parts by axis.
int outAxisSize = inpShape[axis] / outputs.size();
sliceRanges.resize(outputs.size(),
std::vector<Range>(axis + 1, Range::all()));
int prevSlice = 0;
for (int i = 0; i < outputs.size(); ++i)
{
sliceRanges[i][axis].start = prevSlice;
sliceRanges[i][axis].end = sliceRanges[i][axis].start + outAxisSize;
prevSlice = sliceRanges[i][axis].end;
}
}
else
CV_Assert(outputs.size() == sliceRanges.size());
for (int i = 0; i < outputs.size(); ++i)
{
CV_Assert(sliceRanges[i].size() <= inpShape.dims());
// Fill the rest of ranges.
for (int j = sliceRanges[i].size(); j < inpShape.dims(); ++j)
{
sliceRanges[i].push_back(Range::all());
}
// Clamp.
for (int j = 0; j < sliceRanges[i].size(); ++j)
{
sliceRanges[i][j] = clamp(sliceRanges[i][j], inpShape[j]);
}
}
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inputs_.depth() == CV_16S);
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
if (inputs[0].dims < 4 || (total(shape(outputs[0]), 0, 2) % 4 != 0) ||
(total(shape(outputs[0]), 2) % 4 != 0))
return false;
String opts;
if (use_half)
opts = "-DDtype=half -DDtype4=half4 -DDtype8=half8";
else
opts = "-DDtype=float -DDtype4=float4 -DDtype8=float8";
const UMat& inpMat = inputs[0];
for (size_t i = 0; i < outputs.size(); i++)
{
int groups = outputs[i].size[0];
int channels = outputs[i].size[1];
int rows = outputs[i].size[2];
int cols = outputs[i].size[3];
ocl::Kernel kernel("slice", ocl::dnn::slice_oclsrc, opts);
size_t local[] = { 128 };
size_t global[] = { (size_t)groups * channels / 4 * local[0] };
int idx = 0;
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inpMat));
kernel.set(idx++, (int)(inpMat.size[2] * inpMat.size[3]));
kernel.set(idx++, (int)(rows * cols));
kernel.set(idx++, (int)inpMat.size[3]);
kernel.set(idx++, (int)cols);
kernel.set(idx++, (int)sliceRanges[i][2].start);
kernel.set(idx++, (int)sliceRanges[i][3].start);
kernel.set(idx++, ocl::KernelArg::PtrWriteOnly(outputs[i]));
bool ret = kernel.run(1, global, local, false);
if (!ret)
return false;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
const Mat& inpMat = inputs[0];
CV_Assert(outputs.size() == sliceRanges.size());
for (size_t i = 0; i < outputs.size(); i++)
{
inpMat(sliceRanges[i]).copyTo(outputs[i]);
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
std::vector<std::vector<std::size_t>> offsets;
for (const auto& ranges : sliceRanges)
{
std::vector<std::size_t> offsets_i;
for (const auto& range : ranges)
offsets_i.push_back(range.start);
offsets.push_back(std::move(offsets_i));
}
return make_cuda_node<cuda4dnn::SliceOp>(preferableTarget, std::move(context->stream), std::move(offsets));
}
#endif
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
CV_Assert_N(sliceRanges.size() == 1, inputs.size() <= 2);
std::vector<size_t> axes, offsets, dims;
int from, to, step;
int numDims = sliceRanges[0].size();
if (preferableTarget == DNN_TARGET_MYRIAD)
{
from = axis;
to = numDims;
step = 1;
}
else
{
from = numDims - 1;
to = axis - 1;
step = -1;
}
for (int i = from; i != to; i += step)
{
axes.push_back(i);
offsets.push_back(sliceRanges[0][i].start);
dims.push_back(sliceRanges[0][i].size());
}
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
ieLayer.setType("Crop");
ieLayer.getParameters()["axis"] = axes;
ieLayer.getParameters()["dim"] = dims;
ieLayer.getParameters()["offset"] = offsets;
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(2));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
if (inputs.size() != 2)
{
std::vector<size_t> outShape(numDims);
for (int i = 0; i < numDims; ++i)
outShape[i] = sliceRanges[0][i].size();
ieLayer.getInputPorts()[1].setParameter("type", "weights");
auto shapeSource = InferenceEngine::make_shared_blob<float>({
InferenceEngine::Precision::FP32, outShape,
InferenceEngine::Layout::ANY
});
shapeSource->allocate();
addConstantData("weights", shapeSource, ieLayer);
}
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif
#endif
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert_N(nodes.size() <= 2);
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
CV_Assert(sliceRanges[0].size() == ieInpNode->get_shape().size());
std::vector<int64_t> offsets, dims;
for (int i = 0; i < sliceRanges[0].size(); ++i)
{
offsets.push_back(sliceRanges[0][i].start);
dims.push_back(sliceRanges[0][i].end);
}
auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{offsets.size()}, offsets.data());
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{dims.size()}, dims.data());
auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{dims.size()}, std::vector<int64_t>((int64_t)dims.size(), 1));
auto slice = std::make_shared<ngraph::op::v1::StridedSlice>(ieInpNode,
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
return Ptr<BackendNode>(new InfEngineNgraphNode(slice));
}
#endif // HAVE_DNN_NGRAPH
};
class CropLayerImpl CV_FINAL : public SliceLayerImpl
{
public:
CropLayerImpl(const LayerParams& params) : SliceLayerImpl(LayerParams())
{
setParamsFrom(params);
axis = params.get<int>("axis", 2);
const DictValue *paramOffset = params.ptr("offset");
if (paramOffset)
{
for (int i = 0; i < paramOffset->size(); i++)
offset.push_back(paramOffset->get<int>(i));
}
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 2);
MatShape dstShape = inputs[0];
int start = clamp(axis, dstShape);
for (int i = start; i < dstShape.size(); i++)
{
dstShape[i] = inputs[1][i];
}
outputs.resize(1, dstShape);
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
CV_Assert(2 == inputs.size());
const Mat &inpBlob = inputs[0];
const Mat &inpSzBlob = inputs[1];
int dims = inpBlob.dims;
int start_axis = clamp(axis, dims);
std::vector<int> offset_final(dims, 0);
if (offset.size() == 1)
{
for (int i = start_axis; i < dims; i++)
offset_final[i] = offset[0];
}
else if (offset.size() > 1)
{
if ((int)offset.size() != dims - start_axis)
CV_Error(Error::StsBadArg, "number of offset values specified must be "
"equal to the number of dimensions following axis.");
for (int i = start_axis; i < dims; i++)
offset_final[i] = offset[i - start_axis];
}
sliceRanges.resize(1);
sliceRanges[0].resize(dims);
for (int i = 0; i < start_axis; i++)
{
sliceRanges[0][i] = Range(0, inpBlob.size[i]);
}
for (int i = start_axis; i < dims; i++)
{
if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size[i] > inpBlob.size[i])
CV_Error(Error::StsBadArg, "invalid crop parameters or blob sizes");
sliceRanges[0][i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size[i]);
}
}
private:
std::vector<int> offset;
};
Ptr<SliceLayer> SliceLayer::create(const LayerParams& params)
{
return Ptr<SliceLayer>(new SliceLayerImpl(params));
}
Ptr<Layer> CropLayer::create(const LayerParams& params)
{
return Ptr<Layer>(new CropLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,396 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "../op_vkcom.hpp"
#include <algorithm>
#include <stdlib.h>
using std::max;
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
using namespace cv::dnn::ocl4dnn;
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/softmax.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class SoftMaxLayerImpl CV_FINAL : public SoftmaxLayer
{
public:
SoftMaxLayerImpl(const LayerParams& params)
{
axisRaw = params.get<int>("axis", 1);
logSoftMax = params.get<bool>("log_softmax", false);
setParamsFrom(params);
}
#ifdef HAVE_OPENCL
Ptr<OCL4DNNSoftmax<float> > softmaxOp;
#endif
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
MatShape shape = inputs[0];
int cAxis = clamp(axisRaw, shape.size());
shape[cAxis] = 1;
internals.assign(1, shape);
return inplace;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1) ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && !logSoftMax) ||
(backendId == DNN_BACKEND_VKCOM && haveVulkan());
}
#ifdef HAVE_OPENCL
virtual void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs) CV_OVERRIDE
{
softmaxOp.release();
}
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
bool use_half = (inputs_.depth() == CV_16S);
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
internals_.getUMatVector(internals);
UMat& src = inputs[0];
UMat& dstMat = outputs[0];
int axis = clamp(axisRaw, src.dims);
if (softmaxOp.empty())
{
OCL4DNNSoftmaxConfig config;
config.in_shape = shape(inputs[0]);
config.axis = axis;
config.channels = inputs[0].size[axis];
config.logsoftmax = logSoftMax;
config.use_half = use_half;
softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
}
if (softmaxOp->Forward(src, dstMat))
return true;
UMat& bufMat = internals[0];
MatShape s = shape(src);
size_t outerSize = total(s, 0, axis);
size_t channels = src.size[axis];
size_t innerSize = total(s, axis + 1);
String buildOpts = format("-DT=%s", use_half ? "half" : "float");
ocl::Kernel kmax, ksub, ksum, kdiv;
if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
if (logSoftMax) buildOpts += " -DLOG_SOFTMAX ";
if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
size_t bufSize = internals[0].total();
size_t totalSize = src.total();
size_t internal_globalSize[1] = { bufSize };
size_t total_globalSize[1] = { totalSize };
kmax.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadWrite(bufMat));
if (!kmax.run(1, internal_globalSize, NULL, false))
return false;
ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat),
ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(dstMat));
if (!ksub.run(1, total_globalSize, NULL, false))
return false;
ksum.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
if (!ksum.run(1, internal_globalSize, NULL, false))
return false;
kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
if (!kdiv.run(1, total_globalSize, NULL, false))
return false;
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
const Mat &src = inputs[0];
Mat &dst = outputs[0];
int axis = clamp(axisRaw, src.dims);
size_t outerSize = src.total(0, axis), channels = src.size[axis],
innerSize = src.total(axis + 1);
CV_Assert(src.type() == CV_32F);
CV_Assert(src.isContinuous() && dst.isContinuous());
const float *srcPtr = src.ptr<float>();
float *dstPtr = dst.ptr<float>();
float *bufPtr = internals[0].ptr<float>();
size_t outerStep = src.total(axis);
size_t cnStep = src.total(axis + 1);
//compute max along axis
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
for (size_t cnDim = 1; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
}
}
//subtract max
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
const int offset = srcOffset + cnDim * cnStep;
for (size_t i = 0; i < innerSize; i++)
dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];
}
}
cv::exp(dst, dst);
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
//sum exp along axis
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = 0.f;
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
const int offset = srcOffset + cnDim * cnStep;
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] += dstPtr[offset + i];
}
//divide by computed sum
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
const int offset = srcOffset + cnDim * cnStep;
for (size_t i = 0; i < innerSize; i++)
dstPtr[offset + i] /= bufPtr[bufOffset + i];
}
if (logSoftMax)
{
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
const int offset = srcOffset + cnDim * cnStep;
for (size_t i = 0; i < innerSize; i++)
dstPtr[offset + i] = log(dstPtr[offset + i]);
}
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto channel_axis = clamp(axisRaw, input_wrapper->getRank());
return make_cuda_node<cuda4dnn::SoftmaxOp>(preferableTarget, std::move(context->cudnn_handle), channel_axis, logSoftMax);
}
#endif
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_VULKAN
vkcom::Tensor in = VkComTensor(inputs[0]);
int cAxis = clamp(axisRaw, in.dimNum());
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpSoftmax(cAxis, logSoftMax));
return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
#endif // HAVE_VULKAN
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
int inW, inH, inC, inN;
getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
if (inW != 1 || inH != 1)
CV_Error(cv::Error::StsNotImplemented,
"Halide backend for SoftMax with spatial size "
"more than 1x1 is not implemented");
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Func expInput("expInput");
Halide::RDom r(0, inW, 0, inH, 0, inC);
expInput(x, y, c, n) = exp(inputBuffer(x, y, c, n));
Halide::Expr globalSum = sum(expInput(r.x, r.y, r.z, n));
top(x, y, c, n) = expInput(x, y, c, n) / globalSum;
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
InferenceEngine::Builder::SoftMaxLayer ieLayer(name);
ieLayer.setAxis(clamp(axisRaw, input->getDims().size()));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_INF_ENGINE
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
int axis = clamp(axisRaw, ieInpNode->get_shape().size());
auto softmax = std::make_shared<ngraph::op::v1::Softmax>(ieInpNode, axis);
return Ptr<BackendNode>(new InfEngineNgraphNode(softmax));
}
#endif // HAVE_DNN_NGRAPH
int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
int64 flops = 0;
for (int i = 0; i < inputs.size(); i++)
{
flops += 4*total(inputs[i]);
}
return flops;
}
int axisRaw;
};
Ptr<SoftmaxLayer> SoftmaxLayer::create(const LayerParams& params)
{
return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(params));
}
}
}

View File

@@ -0,0 +1,128 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#include "layers_common.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/split.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class SplitLayerImpl CV_FINAL : public SplitLayer
{
public:
SplitLayerImpl(const LayerParams &params)
{
setParamsFrom(params);
//TODO: maybe "top_count" param is useless because it can be determined by output connections number
if (params.has("top_count"))
{
outputsCount = params.get<int>("top_count");
CV_Assert(outputsCount >= 0);
}
else
{
outputsCount = -1;
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
Layer::getMemoryShapes(inputs, max(1, outputsCount >= 0 ? outputsCount : requiredOutputs),
outputs, internals);
return false;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
for (size_t i = 0; i < outputs.size(); i++)
{
CV_Assert(inputs[0].total() == outputs[i].total());
inputs[0].copyTo(outputs[i]);
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::SplitOp>(preferableTarget, std::move(context->stream));
}
#endif
};
Ptr<SplitLayer> SplitLayer::create(const LayerParams& params)
{
return Ptr<SplitLayer>(new SplitLayerImpl(params));
}
}
}