add part of opencv

This commit is contained in:
Tang1705
2020-01-27 20:20:56 +08:00
parent 0c4ac1d8bb
commit a71fa47620
6518 changed files with 3122580 additions and 0 deletions

View File

@@ -0,0 +1,881 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
//#include <math.h>
#include "precomp.hpp"
#include "opencl_kernels_video.hpp"
namespace cv
{
/*!
The class implements the following algorithm:
"Efficient Adaptive Density Estimation per Image Pixel for the Task of Background Subtraction"
Z.Zivkovic, F. van der Heijden
Pattern Recognition Letters, vol. 27, no. 7, pages 773-780, 2006
http://www.zoranz.net/Publications/zivkovicPRL2006.pdf
*/
// default parameters of gaussian background detection algorithm
static const int defaultHistory2 = 500; // Learning rate; alpha = 1/defaultHistory2
static const int defaultNsamples = 7; // number of samples saved in memory
static const float defaultDist2Threshold = 20.0f*20.0f;//threshold on distance from the sample
// additional parameters
static const unsigned char defaultnShadowDetection2 = (unsigned char)127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
static const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
class BackgroundSubtractorKNNImpl CV_FINAL : public BackgroundSubtractorKNN
{
public:
//! the default constructor
BackgroundSubtractorKNNImpl()
{
frameSize = Size(0,0);
frameType = 0;
nframes = 0;
history = defaultHistory2;
//set parameters
// N - the number of samples stored in memory per model
nN = defaultNsamples;
//kNN - k nearest neighbour - number on NN for detecting background - default K=[0.1*nN]
nkNN=MAX(1,cvRound(0.1*nN*3+0.40));
//Tb - Threshold Tb*kernelwidth
fTb = defaultDist2Threshold;
// Shadow detection
bShadowDetection = 1;//turn on
nShadowDetection = defaultnShadowDetection2;
fTau = defaultfTau;// Tau - shadow threshold
name_ = "BackgroundSubtractor.KNN";
nLongCounter = 0;
nMidCounter = 0;
nShortCounter = 0;
#ifdef HAVE_OPENCL
opencl_ON = true;
#endif
}
//! the full constructor that takes the length of the history,
// the number of gaussian mixtures, the background ratio parameter and the noise strength
BackgroundSubtractorKNNImpl(int _history, float _dist2Threshold, bool _bShadowDetection=true)
{
frameSize = Size(0,0);
frameType = 0;
nframes = 0;
history = _history > 0 ? _history : defaultHistory2;
//set parameters
// N - the number of samples stored in memory per model
nN = defaultNsamples;
//kNN - k nearest neighbour - number on NN for detcting background - default K=[0.1*nN]
nkNN=MAX(1,cvRound(0.1*nN*3+0.40));
//Tb - Threshold Tb*kernelwidth
fTb = _dist2Threshold>0? _dist2Threshold : defaultDist2Threshold;
bShadowDetection = _bShadowDetection;
nShadowDetection = defaultnShadowDetection2;
fTau = defaultfTau;
name_ = "BackgroundSubtractor.KNN";
nLongCounter = 0;
nMidCounter = 0;
nShortCounter = 0;
#ifdef HAVE_OPENCL
opencl_ON = true;
#endif
}
//! the destructor
~BackgroundSubtractorKNNImpl() CV_OVERRIDE {}
//! the update operator
void apply(InputArray image, OutputArray fgmask, double learningRate) CV_OVERRIDE;
//! computes a background image which are the mean of all background gaussians
virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE;
//! re-initialization method
void initialize(Size _frameSize, int _frameType)
{
frameSize = _frameSize;
frameType = _frameType;
nframes = 0;
int nchannels = CV_MAT_CN(frameType);
CV_Assert( nchannels <= CV_CN_MAX );
// Reserve memory for the model
int size=frameSize.height*frameSize.width;
//Reset counters
nShortCounter = 0;
nMidCounter = 0;
nLongCounter = 0;
#ifdef HAVE_OPENCL
if (ocl::isOpenCLActivated() && opencl_ON)
{
create_ocl_apply_kernel();
kernel_getBg.create("getBackgroundImage2_kernel", ocl::video::bgfg_knn_oclsrc, format( "-D CN=%d -D NSAMPLES=%d", nchannels, nN));
if (kernel_apply.empty() || kernel_getBg.empty())
opencl_ON = false;
}
else opencl_ON = false;
if (opencl_ON)
{
u_flag.create(frameSize.height * nN * 3, frameSize.width, CV_8UC1);
u_flag.setTo(Scalar::all(0));
if (nchannels==3)
nchannels=4;
u_sample.create(frameSize.height * nN * 3, frameSize.width, CV_32FC(nchannels));
u_sample.setTo(Scalar::all(0));
u_aModelIndexShort.create(frameSize.height, frameSize.width, CV_8UC1);
u_aModelIndexShort.setTo(Scalar::all(0));
u_aModelIndexMid.create(frameSize.height, frameSize.width, CV_8UC1);
u_aModelIndexMid.setTo(Scalar::all(0));
u_aModelIndexLong.create(frameSize.height, frameSize.width, CV_8UC1);
u_aModelIndexLong.setTo(Scalar::all(0));
u_nNextShortUpdate.create(frameSize.height, frameSize.width, CV_8UC1);
u_nNextShortUpdate.setTo(Scalar::all(0));
u_nNextMidUpdate.create(frameSize.height, frameSize.width, CV_8UC1);
u_nNextMidUpdate.setTo(Scalar::all(0));
u_nNextLongUpdate.create(frameSize.height, frameSize.width, CV_8UC1);
u_nNextLongUpdate.setTo(Scalar::all(0));
}
else
#endif
{
// for each sample of 3 speed pixel models each pixel bg model we store ...
// values + flag (nchannels+1 values)
bgmodel.create( 1,(nN * 3) * (nchannels+1)* size,CV_8U);
bgmodel = Scalar::all(0);
//index through the three circular lists
aModelIndexShort.create(1,size,CV_8U);
aModelIndexMid.create(1,size,CV_8U);
aModelIndexLong.create(1,size,CV_8U);
//when to update next
nNextShortUpdate.create(1,size,CV_8U);
nNextMidUpdate.create(1,size,CV_8U);
nNextLongUpdate.create(1,size,CV_8U);
aModelIndexShort = Scalar::all(0);//random? //((m_nN)*rand())/(RAND_MAX+1);//0...m_nN-1
aModelIndexMid = Scalar::all(0);
aModelIndexLong = Scalar::all(0);
nNextShortUpdate = Scalar::all(0);
nNextMidUpdate = Scalar::all(0);
nNextLongUpdate = Scalar::all(0);
}
}
virtual int getHistory() const CV_OVERRIDE { return history; }
virtual void setHistory(int _nframes) CV_OVERRIDE { history = _nframes; }
virtual int getNSamples() const CV_OVERRIDE { return nN; }
virtual void setNSamples(int _nN) CV_OVERRIDE { nN = _nN; }//needs reinitialization!
virtual int getkNNSamples() const CV_OVERRIDE { return nkNN; }
virtual void setkNNSamples(int _nkNN) CV_OVERRIDE { nkNN = _nkNN; }
virtual double getDist2Threshold() const CV_OVERRIDE { return fTb; }
virtual void setDist2Threshold(double _dist2Threshold) CV_OVERRIDE { fTb = (float)_dist2Threshold; }
virtual bool getDetectShadows() const CV_OVERRIDE { return bShadowDetection; }
virtual void setDetectShadows(bool detectshadows) CV_OVERRIDE
{
if (bShadowDetection == detectshadows)
return;
bShadowDetection = detectshadows;
#ifdef HAVE_OPENCL
if (!kernel_apply.empty())
{
create_ocl_apply_kernel();
CV_Assert( !kernel_apply.empty() );
}
#endif
}
virtual int getShadowValue() const CV_OVERRIDE { return nShadowDetection; }
virtual void setShadowValue(int value) CV_OVERRIDE { nShadowDetection = (uchar)value; }
virtual double getShadowThreshold() const CV_OVERRIDE { return fTau; }
virtual void setShadowThreshold(double value) CV_OVERRIDE { fTau = (float)value; }
virtual void write(FileStorage& fs) const CV_OVERRIDE
{
writeFormat(fs);
fs << "name" << name_
<< "history" << history
<< "nsamples" << nN
<< "nKNN" << nkNN
<< "dist2Threshold" << fTb
<< "detectShadows" << (int)bShadowDetection
<< "shadowValue" << (int)nShadowDetection
<< "shadowThreshold" << fTau;
}
virtual void read(const FileNode& fn) CV_OVERRIDE
{
CV_Assert( (String)fn["name"] == name_ );
history = (int)fn["history"];
nN = (int)fn["nsamples"];
nkNN = (int)fn["nKNN"];
fTb = (float)fn["dist2Threshold"];
bShadowDetection = (int)fn["detectShadows"] != 0;
nShadowDetection = saturate_cast<uchar>((int)fn["shadowValue"]);
fTau = (float)fn["shadowThreshold"];
}
protected:
Size frameSize;
int frameType;
int nframes;
/////////////////////////
//very important parameters - things you will change
////////////////////////
int history;
//alpha=1/history - speed of update - if the time interval you want to average over is T
//set alpha=1/history. It is also useful at start to make T slowly increase
//from 1 until the desired T
float fTb;
//Tb - threshold on the squared distance from the sample used to decide if it is well described
//by the background model or not. A typical value could be 2 sigma
//and that is Tb=2*2*10*10 =400; where we take typical pixel level sigma=10
/////////////////////////
//less important parameters - things you might change but be careful
////////////////////////
int nN;//totlal number of samples
int nkNN;//number on NN for detcting background - default K=[0.1*nN]
//shadow detection parameters
bool bShadowDetection;//default 1 - do shadow detection
unsigned char nShadowDetection;//do shadow detection - insert this value as the detection result - 127 default value
float fTau;
// Tau - shadow threshold. The shadow is detected if the pixel is darker
//version of the background. Tau is a threshold on how much darker the shadow can be.
//Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
//See: Prati,Mikic,Trivedi,Cucchiara,"Detecting Moving Shadows...",IEEE PAMI,2003.
//model data
int nLongCounter;//circular counter
int nMidCounter;
int nShortCounter;
Mat bgmodel; // model data pixel values
Mat aModelIndexShort;// index into the models
Mat aModelIndexMid;
Mat aModelIndexLong;
Mat nNextShortUpdate;//random update points per model
Mat nNextMidUpdate;
Mat nNextLongUpdate;
#ifdef HAVE_OPENCL
mutable bool opencl_ON;
UMat u_flag;
UMat u_sample;
UMat u_aModelIndexShort;
UMat u_aModelIndexMid;
UMat u_aModelIndexLong;
UMat u_nNextShortUpdate;
UMat u_nNextMidUpdate;
UMat u_nNextLongUpdate;
mutable ocl::Kernel kernel_apply;
mutable ocl::Kernel kernel_getBg;
#endif
String name_;
#ifdef HAVE_OPENCL
bool ocl_getBackgroundImage(OutputArray backgroundImage) const;
bool ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate=-1);
void create_ocl_apply_kernel();
#endif
};
CV_INLINE void
_cvUpdatePixelBackgroundNP(int x_idx, const uchar* data, int nchannels, int m_nN,
uchar* m_aModel,
uchar* m_nNextLongUpdate,
uchar* m_nNextMidUpdate,
uchar* m_nNextShortUpdate,
uchar* m_aModelIndexLong,
uchar* m_aModelIndexMid,
uchar* m_aModelIndexShort,
int m_nLongCounter,
int m_nMidCounter,
int m_nShortCounter,
uchar include
)
{
// hold the offset
int ndata=1+nchannels;
long offsetLong = ndata * (m_aModelIndexLong[x_idx] + m_nN * 2);
long offsetMid = ndata * (m_aModelIndexMid[x_idx] + m_nN * 1);
long offsetShort = ndata * (m_aModelIndexShort[x_idx]);
// Long update?
if (m_nNextLongUpdate[x_idx] == m_nLongCounter)
{
// add the oldest pixel from Mid to the list of values (for each color)
memcpy(&m_aModel[offsetLong],&m_aModel[offsetMid],ndata*sizeof(unsigned char));
// increase the index
m_aModelIndexLong[x_idx] = (m_aModelIndexLong[x_idx] >= (m_nN-1)) ? 0 : (m_aModelIndexLong[x_idx] + 1);
};
// Mid update?
if (m_nNextMidUpdate[x_idx] == m_nMidCounter)
{
// add this pixel to the list of values (for each color)
memcpy(&m_aModel[offsetMid],&m_aModel[offsetShort],ndata*sizeof(unsigned char));
// increase the index
m_aModelIndexMid[x_idx] = (m_aModelIndexMid[x_idx] >= (m_nN-1)) ? 0 : (m_aModelIndexMid[x_idx] + 1);
};
// Short update?
if (m_nNextShortUpdate[x_idx] == m_nShortCounter)
{
// add this pixel to the list of values (for each color)
memcpy(&m_aModel[offsetShort],data,nchannels*sizeof(unsigned char));
//set the include flag
m_aModel[offsetShort+nchannels]=include;
// increase the index
m_aModelIndexShort[x_idx] = (m_aModelIndexShort[x_idx] >= (m_nN-1)) ? 0 : (m_aModelIndexShort[x_idx] + 1);
};
}
CV_INLINE int
_cvCheckPixelBackgroundNP(const uchar* data, int nchannels,
int m_nN,
uchar* m_aModel,
float m_fTb,
int m_nkNN,
float tau,
bool m_bShadowDetection,
uchar& include)
{
int Pbf = 0; // the total probability that this pixel is background
int Pb = 0; //background model probability
float dData[CV_CN_MAX];
//uchar& include=data[nchannels];
include=0;//do we include this pixel into background model?
int ndata=nchannels+1;
// now increase the probability for each pixel
for (int n = 0; n < m_nN*3; n++)
{
uchar* mean_m = &m_aModel[n*ndata];
//calculate difference and distance
float dist2;
if( nchannels == 3 )
{
dData[0] = (float)mean_m[0] - data[0];
dData[1] = (float)mean_m[1] - data[1];
dData[2] = (float)mean_m[2] - data[2];
dist2 = dData[0]*dData[0] + dData[1]*dData[1] + dData[2]*dData[2];
}
else
{
dist2 = 0.f;
for( int c = 0; c < nchannels; c++ )
{
dData[c] = (float)mean_m[c] - data[c];
dist2 += dData[c]*dData[c];
}
}
if (dist2<m_fTb)
{
Pbf++;//all
//background only
//if(m_aModel[subPosPixel + nchannels])//indicator
if(mean_m[nchannels])//indicator
{
Pb++;
if (Pb >= m_nkNN)//Tb
{
include=1;//include
return 1;//background ->exit
};
}
};
};
//include?
if (Pbf>=m_nkNN)//m_nTbf)
{
include=1;
}
int Ps = 0; // the total probability that this pixel is background shadow
// Detected as moving object, perform shadow detection
if (m_bShadowDetection)
{
for (int n = 0; n < m_nN*3; n++)
{
//long subPosPixel = posPixel + n*ndata;
uchar* mean_m = &m_aModel[n*ndata];
if(mean_m[nchannels])//check only background
{
float numerator = 0.0f;
float denominator = 0.0f;
for( int c = 0; c < nchannels; c++ )
{
numerator += (float)data[c] * mean_m[c];
denominator += (float)mean_m[c] * mean_m[c];
}
// no division by zero allowed
if( denominator == 0 )
return 0;
// if tau < a < 1 then also check the color distortion
if( numerator <= denominator && numerator >= tau*denominator )
{
float a = numerator / denominator;
float dist2a = 0.0f;
for( int c = 0; c < nchannels; c++ )
{
float dD= a*mean_m[c] - data[c];
dist2a += dD*dD;
}
if (dist2a<m_fTb*a*a)
{
Ps++;
if (Ps >= m_nkNN)//shadow
return 2;
};
};
};
};
}
return 0;
}
class KNNInvoker : public ParallelLoopBody
{
public:
KNNInvoker(const Mat& _src, Mat& _dst,
uchar* _bgmodel,
uchar* _nNextLongUpdate,
uchar* _nNextMidUpdate,
uchar* _nNextShortUpdate,
uchar* _aModelIndexLong,
uchar* _aModelIndexMid,
uchar* _aModelIndexShort,
int _nLongCounter,
int _nMidCounter,
int _nShortCounter,
int _nN,
float _fTb,
int _nkNN,
float _fTau,
bool _bShadowDetection,
uchar _nShadowDetection)
{
src = &_src;
dst = &_dst;
m_aModel0 = _bgmodel;
m_nNextLongUpdate0 = _nNextLongUpdate;
m_nNextMidUpdate0 = _nNextMidUpdate;
m_nNextShortUpdate0 = _nNextShortUpdate;
m_aModelIndexLong0 = _aModelIndexLong;
m_aModelIndexMid0 = _aModelIndexMid;
m_aModelIndexShort0 = _aModelIndexShort;
m_nLongCounter = _nLongCounter;
m_nMidCounter = _nMidCounter;
m_nShortCounter = _nShortCounter;
m_nN = _nN;
m_fTb = _fTb;
m_fTau = _fTau;
m_nkNN = _nkNN;
m_bShadowDetection = _bShadowDetection;
m_nShadowDetection = _nShadowDetection;
}
void operator()(const Range& range) const CV_OVERRIDE
{
int y0 = range.start, y1 = range.end;
int ncols = src->cols, nchannels = src->channels();
int ndata=nchannels+1;
for ( int y = y0; y < y1; y++ )
{
const uchar* data = src->ptr(y);
uchar* m_aModel = m_aModel0 + ncols*m_nN*3*ndata*y;
uchar* m_nNextLongUpdate = m_nNextLongUpdate0 + ncols*y;
uchar* m_nNextMidUpdate = m_nNextMidUpdate0 + ncols*y;
uchar* m_nNextShortUpdate = m_nNextShortUpdate0 + ncols*y;
uchar* m_aModelIndexLong = m_aModelIndexLong0 + ncols*y;
uchar* m_aModelIndexMid = m_aModelIndexMid0 + ncols*y;
uchar* m_aModelIndexShort = m_aModelIndexShort0 + ncols*y;
uchar* mask = dst->ptr(y);
for ( int x = 0; x < ncols; x++ )
{
//update model+ background subtract
uchar include=0;
int result= _cvCheckPixelBackgroundNP(data, nchannels,
m_nN, m_aModel, m_fTb,m_nkNN, m_fTau,m_bShadowDetection,include);
_cvUpdatePixelBackgroundNP(x,data,nchannels,
m_nN, m_aModel,
m_nNextLongUpdate,
m_nNextMidUpdate,
m_nNextShortUpdate,
m_aModelIndexLong,
m_aModelIndexMid,
m_aModelIndexShort,
m_nLongCounter,
m_nMidCounter,
m_nShortCounter,
include
);
switch (result)
{
case 0:
//foreground
mask[x] = 255;
break;
case 1:
//background
mask[x] = 0;
break;
case 2:
//shadow
mask[x] = m_nShadowDetection;
break;
}
data += nchannels;
m_aModel += m_nN*3*ndata;
}
}
}
const Mat* src;
Mat* dst;
uchar* m_aModel0;
uchar* m_nNextLongUpdate0;
uchar* m_nNextMidUpdate0;
uchar* m_nNextShortUpdate0;
uchar* m_aModelIndexLong0;
uchar* m_aModelIndexMid0;
uchar* m_aModelIndexShort0;
int m_nLongCounter;
int m_nMidCounter;
int m_nShortCounter;
int m_nN;
float m_fTb;
float m_fTau;
int m_nkNN;
bool m_bShadowDetection;
uchar m_nShadowDetection;
};
#ifdef HAVE_OPENCL
bool BackgroundSubtractorKNNImpl::ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate)
{
bool needToInitialize = nframes == 0 || learningRate >= 1 || _image.size() != frameSize || _image.type() != frameType;
if( needToInitialize )
initialize(_image.size(), _image.type());
++nframes;
learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history );
CV_Assert(learningRate >= 0);
_fgmask.create(_image.size(), CV_8U);
UMat fgmask = _fgmask.getUMat();
UMat frame = _image.getUMat();
//recalculate update rates - in case alpha is changed
// calculate update parameters (using alpha)
int Kshort,Kmid,Klong;
//approximate exponential learning curve
Kshort=(int)(log(0.7)/log(1-learningRate))+1;//Kshort
Kmid=(int)(log(0.4)/log(1-learningRate))-Kshort+1;//Kmid
Klong=(int)(log(0.1)/log(1-learningRate))-Kshort-Kmid+1;//Klong
//refresh rates
int nShortUpdate = (Kshort/nN)+1;
int nMidUpdate = (Kmid/nN)+1;
int nLongUpdate = (Klong/nN)+1;
int idxArg = 0;
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::ReadOnly(frame));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadOnly(u_nNextLongUpdate));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadOnly(u_nNextMidUpdate));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadOnly(u_nNextShortUpdate));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_aModelIndexLong));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_aModelIndexMid));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_aModelIndexShort));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_flag));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_sample));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::WriteOnlyNoSize(fgmask));
idxArg = kernel_apply.set(idxArg, nLongCounter);
idxArg = kernel_apply.set(idxArg, nMidCounter);
idxArg = kernel_apply.set(idxArg, nShortCounter);
idxArg = kernel_apply.set(idxArg, fTb);
idxArg = kernel_apply.set(idxArg, nkNN);
idxArg = kernel_apply.set(idxArg, fTau);
if (bShadowDetection)
kernel_apply.set(idxArg, nShadowDetection);
size_t globalsize[2] = {(size_t)frame.cols, (size_t)frame.rows};
if(!kernel_apply.run(2, globalsize, NULL, true))
return false;
nShortCounter++;//0,1,...,nShortUpdate-1
nMidCounter++;
nLongCounter++;
if (nShortCounter >= nShortUpdate)
{
nShortCounter = 0;
randu(u_nNextShortUpdate, Scalar::all(0), Scalar::all(nShortUpdate));
}
if (nMidCounter >= nMidUpdate)
{
nMidCounter = 0;
randu(u_nNextMidUpdate, Scalar::all(0), Scalar::all(nMidUpdate));
}
if (nLongCounter >= nLongUpdate)
{
nLongCounter = 0;
randu(u_nNextLongUpdate, Scalar::all(0), Scalar::all(nLongUpdate));
}
return true;
}
bool BackgroundSubtractorKNNImpl::ocl_getBackgroundImage(OutputArray _backgroundImage) const
{
_backgroundImage.create(frameSize, frameType);
UMat dst = _backgroundImage.getUMat();
int idxArg = 0;
idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_flag));
idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_sample));
idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::WriteOnly(dst));
size_t globalsize[2] = {(size_t)dst.cols, (size_t)dst.rows};
return kernel_getBg.run(2, globalsize, NULL, false);
}
void BackgroundSubtractorKNNImpl::create_ocl_apply_kernel()
{
int nchannels = CV_MAT_CN(frameType);
String opts = format("-D CN=%d -D NSAMPLES=%d%s", nchannels, nN, bShadowDetection ? " -D SHADOW_DETECT" : "");
kernel_apply.create("knn_kernel", ocl::video::bgfg_knn_oclsrc, opts);
}
#endif
void BackgroundSubtractorKNNImpl::apply(InputArray _image, OutputArray _fgmask, double learningRate)
{
CV_INSTRUMENT_REGION();
#ifdef HAVE_OPENCL
if (opencl_ON)
{
#ifndef __APPLE__
CV_OCL_RUN(_fgmask.isUMat() && OCL_PERFORMANCE_CHECK(!ocl::Device::getDefault().isIntel() || _image.channels() == 1),
ocl_apply(_image, _fgmask, learningRate))
#else
CV_OCL_RUN(_fgmask.isUMat() && OCL_PERFORMANCE_CHECK(!ocl::Device::getDefault().isIntel()),
ocl_apply(_image, _fgmask, learningRate))
#endif
opencl_ON = false;
nframes = 0;
}
#endif
bool needToInitialize = nframes == 0 || learningRate >= 1 || _image.size() != frameSize || _image.type() != frameType;
if( needToInitialize )
initialize(_image.size(), _image.type());
Mat image = _image.getMat();
_fgmask.create( image.size(), CV_8U );
Mat fgmask = _fgmask.getMat();
++nframes;
learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history );
CV_Assert(learningRate >= 0);
//recalculate update rates - in case alpha is changed
// calculate update parameters (using alpha)
int Kshort,Kmid,Klong;
//approximate exponential learning curve
Kshort=(int)(log(0.7)/log(1-learningRate))+1;//Kshort
Kmid=(int)(log(0.4)/log(1-learningRate))-Kshort+1;//Kmid
Klong=(int)(log(0.1)/log(1-learningRate))-Kshort-Kmid+1;//Klong
//refresh rates
int nShortUpdate = (Kshort/nN)+1;
int nMidUpdate = (Kmid/nN)+1;
int nLongUpdate = (Klong/nN)+1;
parallel_for_(Range(0, image.rows),
KNNInvoker(image, fgmask,
bgmodel.ptr(),
nNextLongUpdate.ptr(),
nNextMidUpdate.ptr(),
nNextShortUpdate.ptr(),
aModelIndexLong.ptr(),
aModelIndexMid.ptr(),
aModelIndexShort.ptr(),
nLongCounter,
nMidCounter,
nShortCounter,
nN,
fTb,
nkNN,
fTau,
bShadowDetection,
nShadowDetection),
image.total()/(double)(1 << 16));
nShortCounter++;//0,1,...,nShortUpdate-1
nMidCounter++;
nLongCounter++;
if (nShortCounter >= nShortUpdate)
{
nShortCounter = 0;
randu(nNextShortUpdate, Scalar::all(0), Scalar::all(nShortUpdate));
}
if (nMidCounter >= nMidUpdate)
{
nMidCounter = 0;
randu(nNextMidUpdate, Scalar::all(0), Scalar::all(nMidUpdate));
}
if (nLongCounter >= nLongUpdate)
{
nLongCounter = 0;
randu(nNextLongUpdate, Scalar::all(0), Scalar::all(nLongUpdate));
}
}
void BackgroundSubtractorKNNImpl::getBackgroundImage(OutputArray backgroundImage) const
{
CV_INSTRUMENT_REGION();
#ifdef HAVE_OPENCL
if (opencl_ON)
{
CV_OCL_RUN(opencl_ON, ocl_getBackgroundImage(backgroundImage))
opencl_ON = false;
}
#endif
int nchannels = CV_MAT_CN(frameType);
//CV_Assert( nchannels == 3 );
Mat meanBackground(frameSize, CV_8UC3, Scalar::all(0));
int ndata=nchannels+1;
int modelstep=(ndata * nN * 3);
const uchar* pbgmodel=bgmodel.ptr(0);
for(int row=0; row<meanBackground.rows; row++)
{
for(int col=0; col<meanBackground.cols; col++)
{
for (int n = 0; n < nN*3; n++)
{
const uchar* mean_m = &pbgmodel[n*ndata];
if (mean_m[nchannels])
{
meanBackground.at<Vec3b>(row, col) = Vec3b(mean_m);
break;
}
}
pbgmodel=pbgmodel+modelstep;
}
}
switch(CV_MAT_CN(frameType))
{
case 1:
{
std::vector<Mat> channels;
split(meanBackground, channels);
channels[0].copyTo(backgroundImage);
break;
}
case 3:
{
meanBackground.copyTo(backgroundImage);
break;
}
default:
CV_Error(Error::StsUnsupportedFormat, "");
}
}
Ptr<BackgroundSubtractorKNN> createBackgroundSubtractorKNN(int _history, double _threshold2,
bool _bShadowDetection)
{
return makePtr<BackgroundSubtractorKNNImpl>(_history, (float)_threshold2, _bShadowDetection);
}
}
/* End of file. */

View File

@@ -0,0 +1,965 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*//Implementation of the Gaussian mixture model background subtraction from:
//
//"Improved adaptive Gaussian mixture model for background subtraction"
//Z.Zivkovic
//International Conference Pattern Recognition, UK, August, 2004
//http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
//The code is very fast and performs also shadow detection.
//Number of Gausssian components is adapted per pixel.
//
// and
//
//"Efficient Adaptive Density Estimapion per Image Pixel for the Task of Background Subtraction"
//Z.Zivkovic, F. van der Heijden
//Pattern Recognition Letters, vol. 27, no. 7, pages 773-780, 2006.
//
//The algorithm similar to the standard Stauffer&Grimson algorithm with
//additional selection of the number of the Gaussian components based on:
//
//"Recursive unsupervised learning of finite mixture models "
//Z.Zivkovic, F.van der Heijden
//IEEE Trans. on Pattern Analysis and Machine Intelligence, vol.26, no.5, pages 651-656, 2004
//http://www.zoranz.net/Publications/zivkovic2004PAMI.pdf
//
//
//Example usage with as cpp class
// BackgroundSubtractorMOG2 bg_model;
//For each new image the model is updates using:
// bg_model(img, fgmask);
//
//Example usage as part of the CvBGStatModel:
// CvBGStatModel* bg_model = cvCreateGaussianBGModel2( first_frame );
//
// //update for each frame
// cvUpdateBGStatModel( tmp_frame, bg_model );//segmentation result is in bg_model->foreground
//
// //release at the program termination
// cvReleaseBGStatModel( &bg_model );
//
//Author: Z.Zivkovic, www.zoranz.net
//Date: 7-April-2011, Version:1.0
///////////*/
#include "precomp.hpp"
#include "opencl_kernels_video.hpp"
namespace cv
{
/*
Interface of Gaussian mixture algorithm from:
"Improved adaptive Gaussian mixture model for background subtraction"
Z.Zivkovic
International Conference Pattern Recognition, UK, August, 2004
http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
Advantages:
-fast - number of Gausssian components is constantly adapted per pixel.
-performs also shadow detection (see bgfg_segm_test.cpp example)
*/
// default parameters of gaussian background detection algorithm
static const int defaultHistory2 = 500; // Learning rate; alpha = 1/defaultHistory2
static const float defaultVarThreshold2 = 4.0f*4.0f;
static const int defaultNMixtures2 = 5; // maximal number of Gaussians in mixture
static const float defaultBackgroundRatio2 = 0.9f; // threshold sum of weights for background test
static const float defaultVarThresholdGen2 = 3.0f*3.0f;
static const float defaultVarInit2 = 15.0f; // initial variance for new components
static const float defaultVarMax2 = 5*defaultVarInit2;
static const float defaultVarMin2 = 4.0f;
// additional parameters
static const float defaultfCT2 = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
static const unsigned char defaultnShadowDetection2 = (unsigned char)127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
static const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
class BackgroundSubtractorMOG2Impl CV_FINAL : public BackgroundSubtractorMOG2
{
public:
//! the default constructor
BackgroundSubtractorMOG2Impl()
{
frameSize = Size(0,0);
frameType = 0;
nframes = 0;
history = defaultHistory2;
varThreshold = defaultVarThreshold2;
bShadowDetection = 1;
nmixtures = defaultNMixtures2;
backgroundRatio = defaultBackgroundRatio2;
fVarInit = defaultVarInit2;
fVarMax = defaultVarMax2;
fVarMin = defaultVarMin2;
varThresholdGen = defaultVarThresholdGen2;
fCT = defaultfCT2;
nShadowDetection = defaultnShadowDetection2;
fTau = defaultfTau;
#ifdef HAVE_OPENCL
opencl_ON = true;
#endif
}
//! the full constructor that takes the length of the history,
// the number of gaussian mixtures, the background ratio parameter and the noise strength
BackgroundSubtractorMOG2Impl(int _history, float _varThreshold, bool _bShadowDetection=true)
{
frameSize = Size(0,0);
frameType = 0;
nframes = 0;
history = _history > 0 ? _history : defaultHistory2;
varThreshold = (_varThreshold>0)? _varThreshold : defaultVarThreshold2;
bShadowDetection = _bShadowDetection;
nmixtures = defaultNMixtures2;
backgroundRatio = defaultBackgroundRatio2;
fVarInit = defaultVarInit2;
fVarMax = defaultVarMax2;
fVarMin = defaultVarMin2;
varThresholdGen = defaultVarThresholdGen2;
fCT = defaultfCT2;
nShadowDetection = defaultnShadowDetection2;
fTau = defaultfTau;
name_ = "BackgroundSubtractor.MOG2";
#ifdef HAVE_OPENCL
opencl_ON = true;
#endif
}
//! the destructor
~BackgroundSubtractorMOG2Impl() CV_OVERRIDE {}
//! the update operator
void apply(InputArray image, OutputArray fgmask, double learningRate) CV_OVERRIDE;
//! computes a background image which are the mean of all background gaussians
virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE;
//! re-initiaization method
void initialize(Size _frameSize, int _frameType)
{
frameSize = _frameSize;
frameType = _frameType;
nframes = 0;
int nchannels = CV_MAT_CN(frameType);
CV_Assert( nchannels <= CV_CN_MAX );
CV_Assert( nmixtures <= 255);
#ifdef HAVE_OPENCL
if (ocl::isOpenCLActivated() && opencl_ON)
{
create_ocl_apply_kernel();
bool isFloat = CV_MAKETYPE(CV_32F,nchannels) == frameType;
kernel_getBg.create("getBackgroundImage2_kernel", ocl::video::bgfg_mog2_oclsrc, format( "-D CN=%d -D FL=%d -D NMIXTURES=%d", nchannels, isFloat, nmixtures));
if (kernel_apply.empty() || kernel_getBg.empty())
opencl_ON = false;
}
else opencl_ON = false;
if (opencl_ON)
{
u_weight.create(frameSize.height * nmixtures, frameSize.width, CV_32FC1);
u_weight.setTo(Scalar::all(0));
u_variance.create(frameSize.height * nmixtures, frameSize.width, CV_32FC1);
u_variance.setTo(Scalar::all(0));
if (nchannels==3)
nchannels=4;
u_mean.create(frameSize.height * nmixtures, frameSize.width, CV_32FC(nchannels)); //4 channels
u_mean.setTo(Scalar::all(0));
//make the array for keeping track of the used modes per pixel - all zeros at start
u_bgmodelUsedModes.create(frameSize, CV_8UC1);
u_bgmodelUsedModes.setTo(cv::Scalar::all(0));
}
else
#endif
{
// for each gaussian mixture of each pixel bg model we store ...
// the mixture weight (w),
// the mean (nchannels values) and
// the covariance
bgmodel.create( 1, frameSize.height*frameSize.width*nmixtures*(2 + nchannels), CV_32F );
//make the array for keeping track of the used modes per pixel - all zeros at start
bgmodelUsedModes.create(frameSize,CV_8U);
bgmodelUsedModes = Scalar::all(0);
}
}
virtual int getHistory() const CV_OVERRIDE { return history; }
virtual void setHistory(int _nframes) CV_OVERRIDE { history = _nframes; }
virtual int getNMixtures() const CV_OVERRIDE { return nmixtures; }
virtual void setNMixtures(int nmix) CV_OVERRIDE { nmixtures = nmix; }
virtual double getBackgroundRatio() const CV_OVERRIDE { return backgroundRatio; }
virtual void setBackgroundRatio(double _backgroundRatio) CV_OVERRIDE { backgroundRatio = (float)_backgroundRatio; }
virtual double getVarThreshold() const CV_OVERRIDE { return varThreshold; }
virtual void setVarThreshold(double _varThreshold) CV_OVERRIDE { varThreshold = _varThreshold; }
virtual double getVarThresholdGen() const CV_OVERRIDE { return varThresholdGen; }
virtual void setVarThresholdGen(double _varThresholdGen) CV_OVERRIDE { varThresholdGen = (float)_varThresholdGen; }
virtual double getVarInit() const CV_OVERRIDE { return fVarInit; }
virtual void setVarInit(double varInit) CV_OVERRIDE { fVarInit = (float)varInit; }
virtual double getVarMin() const CV_OVERRIDE { return fVarMin; }
virtual void setVarMin(double varMin) CV_OVERRIDE { fVarMin = (float)varMin; }
virtual double getVarMax() const CV_OVERRIDE { return fVarMax; }
virtual void setVarMax(double varMax) CV_OVERRIDE { fVarMax = (float)varMax; }
virtual double getComplexityReductionThreshold() const CV_OVERRIDE { return fCT; }
virtual void setComplexityReductionThreshold(double ct) CV_OVERRIDE { fCT = (float)ct; }
virtual bool getDetectShadows() const CV_OVERRIDE { return bShadowDetection; }
virtual void setDetectShadows(bool detectshadows) CV_OVERRIDE
{
if (bShadowDetection == detectshadows)
return;
bShadowDetection = detectshadows;
#ifdef HAVE_OPENCL
if (!kernel_apply.empty())
{
create_ocl_apply_kernel();
CV_Assert( !kernel_apply.empty() );
}
#endif
}
virtual int getShadowValue() const CV_OVERRIDE { return nShadowDetection; }
virtual void setShadowValue(int value) CV_OVERRIDE { nShadowDetection = (uchar)value; }
virtual double getShadowThreshold() const CV_OVERRIDE { return fTau; }
virtual void setShadowThreshold(double value) CV_OVERRIDE { fTau = (float)value; }
virtual void write(FileStorage& fs) const CV_OVERRIDE
{
writeFormat(fs);
fs << "name" << name_
<< "history" << history
<< "nmixtures" << nmixtures
<< "backgroundRatio" << backgroundRatio
<< "varThreshold" << varThreshold
<< "varThresholdGen" << varThresholdGen
<< "varInit" << fVarInit
<< "varMin" << fVarMin
<< "varMax" << fVarMax
<< "complexityReductionThreshold" << fCT
<< "detectShadows" << (int)bShadowDetection
<< "shadowValue" << (int)nShadowDetection
<< "shadowThreshold" << fTau;
}
virtual void read(const FileNode& fn) CV_OVERRIDE
{
CV_Assert( (String)fn["name"] == name_ );
history = (int)fn["history"];
nmixtures = (int)fn["nmixtures"];
backgroundRatio = (float)fn["backgroundRatio"];
varThreshold = (double)fn["varThreshold"];
varThresholdGen = (float)fn["varThresholdGen"];
fVarInit = (float)fn["varInit"];
fVarMin = (float)fn["varMin"];
fVarMax = (float)fn["varMax"];
fCT = (float)fn["complexityReductionThreshold"];
bShadowDetection = (int)fn["detectShadows"] != 0;
nShadowDetection = saturate_cast<uchar>((int)fn["shadowValue"]);
fTau = (float)fn["shadowThreshold"];
}
protected:
Size frameSize;
int frameType;
Mat bgmodel;
Mat bgmodelUsedModes;//keep track of number of modes per pixel
#ifdef HAVE_OPENCL
//for OCL
mutable bool opencl_ON;
UMat u_weight;
UMat u_variance;
UMat u_mean;
UMat u_bgmodelUsedModes;
mutable ocl::Kernel kernel_apply;
mutable ocl::Kernel kernel_getBg;
#endif
int nframes;
int history;
int nmixtures;
//! here it is the maximum allowed number of mixture components.
//! Actual number is determined dynamically per pixel
double varThreshold;
// threshold on the squared Mahalanobis distance to decide if it is well described
// by the background model or not. Related to Cthr from the paper.
// This does not influence the update of the background. A typical value could be 4 sigma
// and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
/////////////////////////
// less important parameters - things you might change but be careful
////////////////////////
float backgroundRatio;
// corresponds to fTB=1-cf from the paper
// TB - threshold when the component becomes significant enough to be included into
// the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
// For alpha=0.001 it means that the mode should exist for approximately 105 frames before
// it is considered foreground
// float noiseSigma;
float varThresholdGen;
//correspondts to Tg - threshold on the squared Mahalan. dist. to decide
//when a sample is close to the existing components. If it is not close
//to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
//Smaller Tg leads to more generated components and higher Tg might make
//lead to small number of components but they can grow too large
float fVarInit;
float fVarMin;
float fVarMax;
//initial variance for the newly generated components.
//It will will influence the speed of adaptation. A good guess should be made.
//A simple way is to estimate the typical standard deviation from the images.
//I used here 10 as a reasonable value
// min and max can be used to further control the variance
float fCT;//CT - complexity reduction prior
//this is related to the number of samples needed to accept that a component
//actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
//the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
//shadow detection parameters
bool bShadowDetection;//default 1 - do shadow detection
unsigned char nShadowDetection;//do shadow detection - insert this value as the detection result - 127 default value
float fTau;
// Tau - shadow threshold. The shadow is detected if the pixel is darker
//version of the background. Tau is a threshold on how much darker the shadow can be.
//Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
//See: Prati,Mikic,Trivedi,Cucchiara,"Detecting Moving Shadows...",IEEE PAMI,2003.
String name_;
template <typename T, int CN>
void getBackgroundImage_intern(OutputArray backgroundImage) const;
#ifdef HAVE_OPENCL
bool ocl_getBackgroundImage(OutputArray backgroundImage) const;
bool ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate=-1);
void create_ocl_apply_kernel();
#endif
};
struct GaussBGStatModel2Params
{
//image info
int nWidth;
int nHeight;
int nND;//number of data dimensions (image channels)
bool bPostFiltering;//default 1 - do postfiltering - will make shadow detection results also give value 255
double minArea; // for postfiltering
bool bInit;//default 1, faster updates at start
/////////////////////////
//very important parameters - things you will change
////////////////////////
float fAlphaT;
//alpha - speed of update - if the time interval you want to average over is T
//set alpha=1/T. It is also useful at start to make T slowly increase
//from 1 until the desired T
float fTb;
//Tb - threshold on the squared Mahalan. dist. to decide if it is well described
//by the background model or not. Related to Cthr from the paper.
//This does not influence the update of the background. A typical value could be 4 sigma
//and that is Tb=4*4=16;
/////////////////////////
//less important parameters - things you might change but be careful
////////////////////////
float fTg;
//Tg - threshold on the squared Mahalan. dist. to decide
//when a sample is close to the existing components. If it is not close
//to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
//Smaller Tg leads to more generated components and higher Tg might make
//lead to small number of components but they can grow too large
float fTB;//1-cf from the paper
//TB - threshold when the component becomes significant enough to be included into
//the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
//For alpha=0.001 it means that the mode should exist for approximately 105 frames before
//it is considered foreground
float fVarInit;
float fVarMax;
float fVarMin;
//initial standard deviation for the newly generated components.
//It will will influence the speed of adaptation. A good guess should be made.
//A simple way is to estimate the typical standard deviation from the images.
//I used here 10 as a reasonable value
float fCT;//CT - complexity reduction prior
//this is related to the number of samples needed to accept that a component
//actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
//the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
//even less important parameters
int nM;//max number of modes - const - 4 is usually enough
//shadow detection parameters
bool bShadowDetection;//default 1 - do shadow detection
unsigned char nShadowDetection;//do shadow detection - insert this value as the detection result
float fTau;
// Tau - shadow threshold. The shadow is detected if the pixel is darker
//version of the background. Tau is a threshold on how much darker the shadow can be.
//Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
//See: Prati,Mikic,Trivedi,Cucchiara,"Detecting Moving Shadows...",IEEE PAMI,2003.
};
struct GMM
{
float weight;
float variance;
};
// shadow detection performed per pixel
// should work for rgb data, could be useful for gray scale and depth data as well
// See: Prati,Mikic,Trivedi,Cucchiara,"Detecting Moving Shadows...",IEEE PAMI,2003.
CV_INLINE bool
detectShadowGMM(const float* data, int nchannels, int nmodes,
const GMM* gmm, const float* mean,
float Tb, float TB, float tau)
{
float tWeight = 0;
// check all the components marked as background:
for( int mode = 0; mode < nmodes; mode++, mean += nchannels )
{
GMM g = gmm[mode];
float numerator = 0.0f;
float denominator = 0.0f;
for( int c = 0; c < nchannels; c++ )
{
numerator += data[c] * mean[c];
denominator += mean[c] * mean[c];
}
// no division by zero allowed
if( denominator == 0 )
return false;
// if tau < a < 1 then also check the color distortion
if( numerator <= denominator && numerator >= tau*denominator )
{
float a = numerator / denominator;
float dist2a = 0.0f;
for( int c = 0; c < nchannels; c++ )
{
float dD= a*mean[c] - data[c];
dist2a += dD*dD;
}
if (dist2a < Tb*g.variance*a*a)
return true;
};
tWeight += g.weight;
if( tWeight > TB )
return false;
};
return false;
}
//update GMM - the base update function performed per pixel
//
//"Efficient Adaptive Density Estimapion per Image Pixel for the Task of Background Subtraction"
//Z.Zivkovic, F. van der Heijden
//Pattern Recognition Letters, vol. 27, no. 7, pages 773-780, 2006.
//
//The algorithm similar to the standard Stauffer&Grimson algorithm with
//additional selection of the number of the Gaussian components based on:
//
//"Recursive unsupervised learning of finite mixture models "
//Z.Zivkovic, F.van der Heijden
//IEEE Trans. on Pattern Analysis and Machine Intelligence, vol.26, no.5, pages 651-656, 2004
//http://www.zoranz.net/Publications/zivkovic2004PAMI.pdf
class MOG2Invoker : public ParallelLoopBody
{
public:
MOG2Invoker(const Mat& _src, Mat& _dst,
GMM* _gmm, float* _mean,
uchar* _modesUsed,
int _nmixtures, float _alphaT,
float _Tb, float _TB, float _Tg,
float _varInit, float _varMin, float _varMax,
float _prune, float _tau, bool _detectShadows,
uchar _shadowVal)
{
src = &_src;
dst = &_dst;
gmm0 = _gmm;
mean0 = _mean;
modesUsed0 = _modesUsed;
nmixtures = _nmixtures;
alphaT = _alphaT;
Tb = _Tb;
TB = _TB;
Tg = _Tg;
varInit = _varInit;
varMin = MIN(_varMin, _varMax);
varMax = MAX(_varMin, _varMax);
prune = _prune;
tau = _tau;
detectShadows = _detectShadows;
shadowVal = _shadowVal;
}
void operator()(const Range& range) const CV_OVERRIDE
{
int y0 = range.start, y1 = range.end;
int ncols = src->cols, nchannels = src->channels();
AutoBuffer<float> buf(src->cols*nchannels);
float alpha1 = 1.f - alphaT;
float dData[CV_CN_MAX];
for( int y = y0; y < y1; y++ )
{
const float* data = buf.data();
if( src->depth() != CV_32F )
src->row(y).convertTo(Mat(1, ncols, CV_32FC(nchannels), (void*)data), CV_32F);
else
data = src->ptr<float>(y);
float* mean = mean0 + ncols*nmixtures*nchannels*y;
GMM* gmm = gmm0 + ncols*nmixtures*y;
uchar* modesUsed = modesUsed0 + ncols*y;
uchar* mask = dst->ptr(y);
for( int x = 0; x < ncols; x++, data += nchannels, gmm += nmixtures, mean += nmixtures*nchannels )
{
//calculate distances to the modes (+ sort)
//here we need to go in descending order!!!
bool background = false;//return value -> true - the pixel classified as background
//internal:
bool fitsPDF = false;//if it remains zero a new GMM mode will be added
int nmodes = modesUsed[x];//current number of modes in GMM
float totalWeight = 0.f;
float* mean_m = mean;
//////
//go through all modes
for( int mode = 0; mode < nmodes; mode++, mean_m += nchannels )
{
float weight = alpha1*gmm[mode].weight + prune;//need only weight if fit is found
int swap_count = 0;
////
//fit not found yet
if( !fitsPDF )
{
//check if it belongs to some of the remaining modes
float var = gmm[mode].variance;
//calculate difference and distance
float dist2;
if( nchannels == 3 )
{
dData[0] = mean_m[0] - data[0];
dData[1] = mean_m[1] - data[1];
dData[2] = mean_m[2] - data[2];
dist2 = dData[0]*dData[0] + dData[1]*dData[1] + dData[2]*dData[2];
}
else
{
dist2 = 0.f;
for( int c = 0; c < nchannels; c++ )
{
dData[c] = mean_m[c] - data[c];
dist2 += dData[c]*dData[c];
}
}
//background? - Tb - usually larger than Tg
if( totalWeight < TB && dist2 < Tb*var )
background = true;
//check fit
if( dist2 < Tg*var )
{
/////
//belongs to the mode
fitsPDF = true;
//update distribution
//update weight
weight += alphaT;
float k = alphaT/weight;
//update mean
for( int c = 0; c < nchannels; c++ )
mean_m[c] -= k*dData[c];
//update variance
float varnew = var + k*(dist2-var);
//limit the variance
varnew = MAX(varnew, varMin);
varnew = MIN(varnew, varMax);
gmm[mode].variance = varnew;
//sort
//all other weights are at the same place and
//only the matched (iModes) is higher -> just find the new place for it
for( int i = mode; i > 0; i-- )
{
//check one up
if( weight < gmm[i-1].weight )
break;
swap_count++;
//swap one up
std::swap(gmm[i], gmm[i-1]);
for( int c = 0; c < nchannels; c++ )
std::swap(mean[i*nchannels + c], mean[(i-1)*nchannels + c]);
}
//belongs to the mode - bFitsPDF becomes 1
/////
}
}//!bFitsPDF)
//check prune
if( weight < -prune )
{
weight = 0.0;
nmodes--;
}
gmm[mode-swap_count].weight = weight;//update weight by the calculated value
totalWeight += weight;
}
//go through all modes
//////
// Renormalize weights. In the special case that the pixel does
// not agree with any modes, set weights to zero (a new mode will be added below).
float invWeight = 0.f;
if (std::abs(totalWeight) > FLT_EPSILON) {
invWeight = 1.f/totalWeight;
}
for( int mode = 0; mode < nmodes; mode++ )
{
gmm[mode].weight *= invWeight;
}
//make new mode if needed and exit
if( !fitsPDF && alphaT > 0.f )
{
// replace the weakest or add a new one
int mode = nmodes == nmixtures ? nmixtures-1 : nmodes++;
if (nmodes==1)
gmm[mode].weight = 1.f;
else
{
gmm[mode].weight = alphaT;
// renormalize all other weights
for( int i = 0; i < nmodes-1; i++ )
gmm[i].weight *= alpha1;
}
// init
for( int c = 0; c < nchannels; c++ )
mean[mode*nchannels + c] = data[c];
gmm[mode].variance = varInit;
//sort
//find the new place for it
for( int i = nmodes - 1; i > 0; i-- )
{
// check one up
if( alphaT < gmm[i-1].weight )
break;
// swap one up
std::swap(gmm[i], gmm[i-1]);
for( int c = 0; c < nchannels; c++ )
std::swap(mean[i*nchannels + c], mean[(i-1)*nchannels + c]);
}
}
//set the number of modes
modesUsed[x] = uchar(nmodes);
mask[x] = background ? 0 :
detectShadows && detectShadowGMM(data, nchannels, nmodes, gmm, mean, Tb, TB, tau) ?
shadowVal : 255;
}
}
}
const Mat* src;
Mat* dst;
GMM* gmm0;
float* mean0;
uchar* modesUsed0;
int nmixtures;
float alphaT, Tb, TB, Tg;
float varInit, varMin, varMax, prune, tau;
bool detectShadows;
uchar shadowVal;
};
#ifdef HAVE_OPENCL
bool BackgroundSubtractorMOG2Impl::ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate)
{
bool needToInitialize = nframes == 0 || learningRate >= 1 || _image.size() != frameSize || _image.type() != frameType;
if( needToInitialize )
initialize(_image.size(), _image.type());
++nframes;
learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history );
CV_Assert(learningRate >= 0);
_fgmask.create(_image.size(), CV_8U);
UMat fgmask = _fgmask.getUMat();
const double alpha1 = 1.0f - learningRate;
UMat frame = _image.getUMat();
float varMax = MAX(fVarMin, fVarMax);
float varMin = MIN(fVarMin, fVarMax);
int idxArg = 0;
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::ReadOnly(frame));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_bgmodelUsedModes));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_weight));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_mean));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_variance));
idxArg = kernel_apply.set(idxArg, ocl::KernelArg::WriteOnlyNoSize(fgmask));
idxArg = kernel_apply.set(idxArg, (float)learningRate); //alphaT
idxArg = kernel_apply.set(idxArg, (float)alpha1);
idxArg = kernel_apply.set(idxArg, (float)(-learningRate*fCT)); //prune
idxArg = kernel_apply.set(idxArg, (float)varThreshold); //c_Tb
idxArg = kernel_apply.set(idxArg, backgroundRatio); //c_TB
idxArg = kernel_apply.set(idxArg, varThresholdGen); //c_Tg
idxArg = kernel_apply.set(idxArg, varMin);
idxArg = kernel_apply.set(idxArg, varMax);
idxArg = kernel_apply.set(idxArg, fVarInit);
idxArg = kernel_apply.set(idxArg, fTau);
if (bShadowDetection)
kernel_apply.set(idxArg, nShadowDetection);
size_t globalsize[] = {(size_t)frame.cols, (size_t)frame.rows, 1};
return kernel_apply.run(2, globalsize, NULL, true);
}
bool BackgroundSubtractorMOG2Impl::ocl_getBackgroundImage(OutputArray _backgroundImage) const
{
_backgroundImage.create(frameSize, frameType);
UMat dst = _backgroundImage.getUMat();
int idxArg = 0;
idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_bgmodelUsedModes));
idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_weight));
idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_mean));
idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::WriteOnly(dst));
kernel_getBg.set(idxArg, backgroundRatio);
size_t globalsize[2] = {(size_t)u_bgmodelUsedModes.cols, (size_t)u_bgmodelUsedModes.rows};
return kernel_getBg.run(2, globalsize, NULL, false);
}
void BackgroundSubtractorMOG2Impl::create_ocl_apply_kernel()
{
int nchannels = CV_MAT_CN(frameType);
bool isFloat = CV_MAKETYPE(CV_32F,nchannels) == frameType;
String opts = format("-D CN=%d -D FL=%d -D NMIXTURES=%d%s", nchannels, isFloat, nmixtures, bShadowDetection ? " -D SHADOW_DETECT" : "");
kernel_apply.create("mog2_kernel", ocl::video::bgfg_mog2_oclsrc, opts);
}
#endif
void BackgroundSubtractorMOG2Impl::apply(InputArray _image, OutputArray _fgmask, double learningRate)
{
CV_INSTRUMENT_REGION();
#ifdef HAVE_OPENCL
if (opencl_ON)
{
CV_OCL_RUN(_fgmask.isUMat(), ocl_apply(_image, _fgmask, learningRate))
opencl_ON = false;
nframes = 0;
}
#endif
bool needToInitialize = nframes == 0 || learningRate >= 1 || _image.size() != frameSize || _image.type() != frameType;
if( needToInitialize )
initialize(_image.size(), _image.type());
Mat image = _image.getMat();
_fgmask.create( image.size(), CV_8U );
Mat fgmask = _fgmask.getMat();
++nframes;
learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history );
CV_Assert(learningRate >= 0);
parallel_for_(Range(0, image.rows),
MOG2Invoker(image, fgmask,
bgmodel.ptr<GMM>(),
(float*)(bgmodel.ptr() + sizeof(GMM)*nmixtures*image.rows*image.cols),
bgmodelUsedModes.ptr(), nmixtures, (float)learningRate,
(float)varThreshold,
backgroundRatio, varThresholdGen,
fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau,
bShadowDetection, nShadowDetection),
image.total()/(double)(1 << 16));
}
template <typename T, int CN>
void BackgroundSubtractorMOG2Impl::getBackgroundImage_intern(OutputArray backgroundImage) const
{
CV_INSTRUMENT_REGION();
Mat meanBackground(frameSize, frameType, Scalar::all(0));
int firstGaussianIdx = 0;
const GMM* gmm = bgmodel.ptr<GMM>();
const float* mean = reinterpret_cast<const float*>(gmm + frameSize.width*frameSize.height*nmixtures);
Vec<float,CN> meanVal(0.f);
for(int row=0; row<meanBackground.rows; row++)
{
for(int col=0; col<meanBackground.cols; col++)
{
int nmodes = bgmodelUsedModes.at<uchar>(row, col);
float totalWeight = 0.f;
for(int gaussianIdx = firstGaussianIdx; gaussianIdx < firstGaussianIdx + nmodes; gaussianIdx++)
{
GMM gaussian = gmm[gaussianIdx];
size_t meanPosition = gaussianIdx*CN;
for(int chn = 0; chn < CN; chn++)
{
meanVal(chn) += gaussian.weight * mean[meanPosition + chn];
}
totalWeight += gaussian.weight;
if(totalWeight > backgroundRatio)
break;
}
float invWeight = 0.f;
if (std::abs(totalWeight) > FLT_EPSILON) {
invWeight = 1.f/totalWeight;
}
meanBackground.at<Vec<T,CN> >(row, col) = Vec<T,CN>(meanVal * invWeight);
meanVal = 0.f;
firstGaussianIdx += nmixtures;
}
}
meanBackground.copyTo(backgroundImage);
}
void BackgroundSubtractorMOG2Impl::getBackgroundImage(OutputArray backgroundImage) const
{
CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_32FC1 || frameType == CV_32FC3);
#ifdef HAVE_OPENCL
if (opencl_ON)
{
CV_OCL_RUN(opencl_ON, ocl_getBackgroundImage(backgroundImage))
opencl_ON = false;
}
#endif
switch(frameType)
{
case CV_8UC1:
getBackgroundImage_intern<uchar,1>(backgroundImage);
break;
case CV_8UC3:
getBackgroundImage_intern<uchar,3>(backgroundImage);
break;
case CV_32FC1:
getBackgroundImage_intern<float,1>(backgroundImage);
break;
case CV_32FC3:
getBackgroundImage_intern<float,3>(backgroundImage);
break;
}
}
Ptr<BackgroundSubtractorMOG2> createBackgroundSubtractorMOG2(int _history, double _varThreshold,
bool _bShadowDetection)
{
return makePtr<BackgroundSubtractorMOG2Impl>(_history, (float)_varThreshold, _bShadowDetection);
}
}
/* End of file. */

View File

@@ -0,0 +1,220 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
int cv::meanShift( InputArray _probImage, Rect& window, TermCriteria criteria )
{
CV_INSTRUMENT_REGION();
Size size;
int cn;
Mat mat;
UMat umat;
bool isUMat = _probImage.isUMat();
if (isUMat)
umat = _probImage.getUMat(), cn = umat.channels(), size = umat.size();
else
mat = _probImage.getMat(), cn = mat.channels(), size = mat.size();
Rect cur_rect = window;
CV_Assert( cn == 1 );
if( window.height <= 0 || window.width <= 0 )
CV_Error( Error::StsBadArg, "Input window has non-positive sizes" );
window = window & Rect(0, 0, size.width, size.height);
double eps = (criteria.type & TermCriteria::EPS) ? std::max(criteria.epsilon, 0.) : 1.;
eps = cvRound(eps*eps);
int i, niters = (criteria.type & TermCriteria::MAX_ITER) ? std::max(criteria.maxCount, 1) : 100;
for( i = 0; i < niters; i++ )
{
cur_rect = cur_rect & Rect(0, 0, size.width, size.height);
if( cur_rect == Rect() )
{
cur_rect.x = size.width/2;
cur_rect.y = size.height/2;
}
cur_rect.width = std::max(cur_rect.width, 1);
cur_rect.height = std::max(cur_rect.height, 1);
Moments m = isUMat ? moments(umat(cur_rect)) : moments(mat(cur_rect));
// Calculating center of mass
if( fabs(m.m00) < DBL_EPSILON )
break;
int dx = cvRound( m.m10/m.m00 - window.width*0.5 );
int dy = cvRound( m.m01/m.m00 - window.height*0.5 );
int nx = std::min(std::max(cur_rect.x + dx, 0), size.width - cur_rect.width);
int ny = std::min(std::max(cur_rect.y + dy, 0), size.height - cur_rect.height);
dx = nx - cur_rect.x;
dy = ny - cur_rect.y;
cur_rect.x = nx;
cur_rect.y = ny;
// Check for coverage centers mass & window
if( dx*dx + dy*dy < eps )
break;
}
window = cur_rect;
return i;
}
cv::RotatedRect cv::CamShift( InputArray _probImage, Rect& window,
TermCriteria criteria )
{
CV_INSTRUMENT_REGION();
const int TOLERANCE = 10;
Size size;
Mat mat;
UMat umat;
bool isUMat = _probImage.isUMat();
if (isUMat)
umat = _probImage.getUMat(), size = umat.size();
else
mat = _probImage.getMat(), size = mat.size();
meanShift( _probImage, window, criteria );
window.x -= TOLERANCE;
if( window.x < 0 )
window.x = 0;
window.y -= TOLERANCE;
if( window.y < 0 )
window.y = 0;
window.width += 2 * TOLERANCE;
if( window.x + window.width > size.width )
window.width = size.width - window.x;
window.height += 2 * TOLERANCE;
if( window.y + window.height > size.height )
window.height = size.height - window.y;
// Calculating moments in new center mass
Moments m = isUMat ? moments(umat(window)) : moments(mat(window));
double m00 = m.m00, m10 = m.m10, m01 = m.m01;
double mu11 = m.mu11, mu20 = m.mu20, mu02 = m.mu02;
if( fabs(m00) < DBL_EPSILON )
return RotatedRect();
double inv_m00 = 1. / m00;
int xc = cvRound( m10 * inv_m00 + window.x );
int yc = cvRound( m01 * inv_m00 + window.y );
double a = mu20 * inv_m00, b = mu11 * inv_m00, c = mu02 * inv_m00;
// Calculating width & height
double square = std::sqrt( 4 * b * b + (a - c) * (a - c) );
// Calculating orientation
double theta = atan2( 2 * b, a - c + square );
// Calculating width & length of figure
double cs = cos( theta );
double sn = sin( theta );
double rotate_a = cs * cs * mu20 + 2 * cs * sn * mu11 + sn * sn * mu02;
double rotate_c = sn * sn * mu20 - 2 * cs * sn * mu11 + cs * cs * mu02;
rotate_a = std::max(0.0, rotate_a); // avoid negative result due calculation numeric errors
rotate_c = std::max(0.0, rotate_c); // avoid negative result due calculation numeric errors
double length = std::sqrt( rotate_a * inv_m00 ) * 4;
double width = std::sqrt( rotate_c * inv_m00 ) * 4;
// In case, when tetta is 0 or 1.57... the Length & Width may be exchanged
if( length < width )
{
std::swap( length, width );
std::swap( cs, sn );
theta = CV_PI*0.5 - theta;
}
// Saving results
int _xc = cvRound( xc );
int _yc = cvRound( yc );
int t0 = cvRound( fabs( length * cs ));
int t1 = cvRound( fabs( width * sn ));
t0 = MAX( t0, t1 ) + 2;
window.width = MIN( t0, (size.width - _xc) * 2 );
t0 = cvRound( fabs( length * sn ));
t1 = cvRound( fabs( width * cs ));
t0 = MAX( t0, t1 ) + 2;
window.height = MIN( t0, (size.height - _yc) * 2 );
window.x = MAX( 0, _xc - window.width / 2 );
window.y = MAX( 0, _yc - window.height / 2 );
window.width = MIN( size.width - window.x, window.width );
window.height = MIN( size.height - window.y, window.height );
RotatedRect box;
box.size.height = (float)length;
box.size.width = (float)width;
box.angle = (float)((CV_PI*0.5+theta)*180./CV_PI);
while(box.angle < 0)
box.angle += 360;
while(box.angle >= 360)
box.angle -= 360;
if(box.angle >= 180)
box.angle -= 180;
box.center = Point2f( window.x + window.width*0.5f, window.y + window.height*0.5f);
return box;
}
/* End of file. */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,601 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
/****************************************************************************************\
* Image Alignment (ECC algorithm) *
\****************************************************************************************/
using namespace cv;
static void image_jacobian_homo_ECC(const Mat& src1, const Mat& src2,
const Mat& src3, const Mat& src4,
const Mat& src5, Mat& dst)
{
CV_Assert(src1.size() == src2.size());
CV_Assert(src1.size() == src3.size());
CV_Assert(src1.size() == src4.size());
CV_Assert( src1.rows == dst.rows);
CV_Assert(dst.cols == (src1.cols*8));
CV_Assert(dst.type() == CV_32FC1);
CV_Assert(src5.isContinuous());
const float* hptr = src5.ptr<float>(0);
const float h0_ = hptr[0];
const float h1_ = hptr[3];
const float h2_ = hptr[6];
const float h3_ = hptr[1];
const float h4_ = hptr[4];
const float h5_ = hptr[7];
const float h6_ = hptr[2];
const float h7_ = hptr[5];
const int w = src1.cols;
//create denominator for all points as a block
Mat den_ = src3*h2_ + src4*h5_ + 1.0;//check the time of this! otherwise use addWeighted
//create projected points
Mat hatX_ = -src3*h0_ - src4*h3_ - h6_;
divide(hatX_, den_, hatX_);
Mat hatY_ = -src3*h1_ - src4*h4_ - h7_;
divide(hatY_, den_, hatY_);
//instead of dividing each block with den,
//just pre-divide the block of gradients (it's more efficient)
Mat src1Divided_;
Mat src2Divided_;
divide(src1, den_, src1Divided_);
divide(src2, den_, src2Divided_);
//compute Jacobian blocks (8 blocks)
dst.colRange(0, w) = src1Divided_.mul(src3);//1
dst.colRange(w,2*w) = src2Divided_.mul(src3);//2
Mat temp_ = (hatX_.mul(src1Divided_)+hatY_.mul(src2Divided_));
dst.colRange(2*w,3*w) = temp_.mul(src3);//3
hatX_.release();
hatY_.release();
dst.colRange(3*w, 4*w) = src1Divided_.mul(src4);//4
dst.colRange(4*w, 5*w) = src2Divided_.mul(src4);//5
dst.colRange(5*w, 6*w) = temp_.mul(src4);//6
src1Divided_.copyTo(dst.colRange(6*w, 7*w));//7
src2Divided_.copyTo(dst.colRange(7*w, 8*w));//8
}
static void image_jacobian_euclidean_ECC(const Mat& src1, const Mat& src2,
const Mat& src3, const Mat& src4,
const Mat& src5, Mat& dst)
{
CV_Assert( src1.size()==src2.size());
CV_Assert( src1.size()==src3.size());
CV_Assert( src1.size()==src4.size());
CV_Assert( src1.rows == dst.rows);
CV_Assert(dst.cols == (src1.cols*3));
CV_Assert(dst.type() == CV_32FC1);
CV_Assert(src5.isContinuous());
const float* hptr = src5.ptr<float>(0);
const float h0 = hptr[0];//cos(theta)
const float h1 = hptr[3];//sin(theta)
const int w = src1.cols;
//create -sin(theta)*X -cos(theta)*Y for all points as a block -> hatX
Mat hatX = -(src3*h1) - (src4*h0);
//create cos(theta)*X -sin(theta)*Y for all points as a block -> hatY
Mat hatY = (src3*h0) - (src4*h1);
//compute Jacobian blocks (3 blocks)
dst.colRange(0, w) = (src1.mul(hatX))+(src2.mul(hatY));//1
src1.copyTo(dst.colRange(w, 2*w));//2
src2.copyTo(dst.colRange(2*w, 3*w));//3
}
static void image_jacobian_affine_ECC(const Mat& src1, const Mat& src2,
const Mat& src3, const Mat& src4,
Mat& dst)
{
CV_Assert(src1.size() == src2.size());
CV_Assert(src1.size() == src3.size());
CV_Assert(src1.size() == src4.size());
CV_Assert(src1.rows == dst.rows);
CV_Assert(dst.cols == (6*src1.cols));
CV_Assert(dst.type() == CV_32FC1);
const int w = src1.cols;
//compute Jacobian blocks (6 blocks)
dst.colRange(0,w) = src1.mul(src3);//1
dst.colRange(w,2*w) = src2.mul(src3);//2
dst.colRange(2*w,3*w) = src1.mul(src4);//3
dst.colRange(3*w,4*w) = src2.mul(src4);//4
src1.copyTo(dst.colRange(4*w,5*w));//5
src2.copyTo(dst.colRange(5*w,6*w));//6
}
static void image_jacobian_translation_ECC(const Mat& src1, const Mat& src2, Mat& dst)
{
CV_Assert( src1.size()==src2.size());
CV_Assert( src1.rows == dst.rows);
CV_Assert(dst.cols == (src1.cols*2));
CV_Assert(dst.type() == CV_32FC1);
const int w = src1.cols;
//compute Jacobian blocks (2 blocks)
src1.copyTo(dst.colRange(0, w));
src2.copyTo(dst.colRange(w, 2*w));
}
static void project_onto_jacobian_ECC(const Mat& src1, const Mat& src2, Mat& dst)
{
/* this functions is used for two types of projections. If src1.cols ==src.cols
it does a blockwise multiplication (like in the outer product of vectors)
of the blocks in matrices src1 and src2 and dst
has size (number_of_blcks x number_of_blocks), otherwise dst is a vector of size
(number_of_blocks x 1) since src2 is "multiplied"(dot) with each block of src1.
The number_of_blocks is equal to the number of parameters we are lloking for
(i.e. rtanslation:2, euclidean: 3, affine: 6, homography: 8)
*/
CV_Assert(src1.rows == src2.rows);
CV_Assert((src1.cols % src2.cols) == 0);
int w;
float* dstPtr = dst.ptr<float>(0);
if (src1.cols !=src2.cols){//dst.cols==1
w = src2.cols;
for (int i=0; i<dst.rows; i++){
dstPtr[i] = (float) src2.dot(src1.colRange(i*w,(i+1)*w));
}
}
else {
CV_Assert(dst.cols == dst.rows); //dst is square (and symmetric)
w = src2.cols/dst.cols;
Mat mat;
for (int i=0; i<dst.rows; i++){
mat = Mat(src1.colRange(i*w, (i+1)*w));
dstPtr[i*(dst.rows+1)] = (float) pow(norm(mat),2); //diagonal elements
for (int j=i+1; j<dst.cols; j++){ //j starts from i+1
dstPtr[i*dst.cols+j] = (float) mat.dot(src2.colRange(j*w, (j+1)*w));
dstPtr[j*dst.cols+i] = dstPtr[i*dst.cols+j]; //due to symmetry
}
}
}
}
static void update_warping_matrix_ECC (Mat& map_matrix, const Mat& update, const int motionType)
{
CV_Assert (map_matrix.type() == CV_32FC1);
CV_Assert (update.type() == CV_32FC1);
CV_Assert (motionType == MOTION_TRANSLATION || motionType == MOTION_EUCLIDEAN ||
motionType == MOTION_AFFINE || motionType == MOTION_HOMOGRAPHY);
if (motionType == MOTION_HOMOGRAPHY)
CV_Assert (map_matrix.rows == 3 && update.rows == 8);
else if (motionType == MOTION_AFFINE)
CV_Assert(map_matrix.rows == 2 && update.rows == 6);
else if (motionType == MOTION_EUCLIDEAN)
CV_Assert (map_matrix.rows == 2 && update.rows == 3);
else
CV_Assert (map_matrix.rows == 2 && update.rows == 2);
CV_Assert (update.cols == 1);
CV_Assert( map_matrix.isContinuous());
CV_Assert( update.isContinuous() );
float* mapPtr = map_matrix.ptr<float>(0);
const float* updatePtr = update.ptr<float>(0);
if (motionType == MOTION_TRANSLATION){
mapPtr[2] += updatePtr[0];
mapPtr[5] += updatePtr[1];
}
if (motionType == MOTION_AFFINE) {
mapPtr[0] += updatePtr[0];
mapPtr[3] += updatePtr[1];
mapPtr[1] += updatePtr[2];
mapPtr[4] += updatePtr[3];
mapPtr[2] += updatePtr[4];
mapPtr[5] += updatePtr[5];
}
if (motionType == MOTION_HOMOGRAPHY) {
mapPtr[0] += updatePtr[0];
mapPtr[3] += updatePtr[1];
mapPtr[6] += updatePtr[2];
mapPtr[1] += updatePtr[3];
mapPtr[4] += updatePtr[4];
mapPtr[7] += updatePtr[5];
mapPtr[2] += updatePtr[6];
mapPtr[5] += updatePtr[7];
}
if (motionType == MOTION_EUCLIDEAN) {
double new_theta = updatePtr[0];
new_theta += asin(mapPtr[3]);
mapPtr[2] += updatePtr[1];
mapPtr[5] += updatePtr[2];
mapPtr[0] = mapPtr[4] = (float) cos(new_theta);
mapPtr[3] = (float) sin(new_theta);
mapPtr[1] = -mapPtr[3];
}
}
/** Function that computes enhanced corelation coefficient from Georgios et.al. 2008
* See https://github.com/opencv/opencv/issues/12432
*/
double cv::computeECC(InputArray templateImage, InputArray inputImage, InputArray inputMask)
{
CV_Assert(!templateImage.empty());
CV_Assert(!inputImage.empty());
if( ! (templateImage.type()==inputImage.type()))
CV_Error( Error::StsUnmatchedFormats, "Both input images must have the same data type" );
Scalar meanTemplate, sdTemplate;
int active_pixels = inputMask.empty() ? templateImage.size().area() : countNonZero(inputMask);
meanStdDev(templateImage, meanTemplate, sdTemplate, inputMask);
Mat templateImage_zeromean = Mat::zeros(templateImage.size(), templateImage.type());
subtract(templateImage, meanTemplate, templateImage_zeromean, inputMask);
double templateImagenorm = std::sqrt(active_pixels*sdTemplate.val[0]*sdTemplate.val[0]);
Scalar meanInput, sdInput;
Mat inputImage_zeromean = Mat::zeros(inputImage.size(), inputImage.type());
meanStdDev(inputImage, meanInput, sdInput, inputMask);
subtract(inputImage, meanInput, inputImage_zeromean, inputMask);
double inputImagenorm = std::sqrt(active_pixels*sdInput.val[0]*sdInput.val[0]);
return templateImage_zeromean.dot(inputImage_zeromean)/(templateImagenorm*inputImagenorm);
}
double cv::findTransformECC(InputArray templateImage,
InputArray inputImage,
InputOutputArray warpMatrix,
int motionType,
TermCriteria criteria,
InputArray inputMask,
int gaussFiltSize)
{
Mat src = templateImage.getMat();//template image
Mat dst = inputImage.getMat(); //input image (to be warped)
Mat map = warpMatrix.getMat(); //warp (transformation)
CV_Assert(!src.empty());
CV_Assert(!dst.empty());
// If the user passed an un-initialized warpMatrix, initialize to identity
if(map.empty()) {
int rowCount = 2;
if(motionType == MOTION_HOMOGRAPHY)
rowCount = 3;
warpMatrix.create(rowCount, 3, CV_32FC1);
map = warpMatrix.getMat();
map = Mat::eye(rowCount, 3, CV_32F);
}
if( ! (src.type()==dst.type()))
CV_Error( Error::StsUnmatchedFormats, "Both input images must have the same data type" );
//accept only 1-channel images
if( src.type() != CV_8UC1 && src.type()!= CV_32FC1)
CV_Error( Error::StsUnsupportedFormat, "Images must have 8uC1 or 32fC1 type");
if( map.type() != CV_32FC1)
CV_Error( Error::StsUnsupportedFormat, "warpMatrix must be single-channel floating-point matrix");
CV_Assert (map.cols == 3);
CV_Assert (map.rows == 2 || map.rows ==3);
CV_Assert (motionType == MOTION_AFFINE || motionType == MOTION_HOMOGRAPHY ||
motionType == MOTION_EUCLIDEAN || motionType == MOTION_TRANSLATION);
if (motionType == MOTION_HOMOGRAPHY){
CV_Assert (map.rows ==3);
}
CV_Assert (criteria.type & TermCriteria::COUNT || criteria.type & TermCriteria::EPS);
const int numberOfIterations = (criteria.type & TermCriteria::COUNT) ? criteria.maxCount : 200;
const double termination_eps = (criteria.type & TermCriteria::EPS) ? criteria.epsilon : -1;
int paramTemp = 6;//default: affine
switch (motionType){
case MOTION_TRANSLATION:
paramTemp = 2;
break;
case MOTION_EUCLIDEAN:
paramTemp = 3;
break;
case MOTION_HOMOGRAPHY:
paramTemp = 8;
break;
}
const int numberOfParameters = paramTemp;
const int ws = src.cols;
const int hs = src.rows;
const int wd = dst.cols;
const int hd = dst.rows;
Mat Xcoord = Mat(1, ws, CV_32F);
Mat Ycoord = Mat(hs, 1, CV_32F);
Mat Xgrid = Mat(hs, ws, CV_32F);
Mat Ygrid = Mat(hs, ws, CV_32F);
float* XcoPtr = Xcoord.ptr<float>(0);
float* YcoPtr = Ycoord.ptr<float>(0);
int j;
for (j=0; j<ws; j++)
XcoPtr[j] = (float) j;
for (j=0; j<hs; j++)
YcoPtr[j] = (float) j;
repeat(Xcoord, hs, 1, Xgrid);
repeat(Ycoord, 1, ws, Ygrid);
Xcoord.release();
Ycoord.release();
Mat templateZM = Mat(hs, ws, CV_32F);// to store the (smoothed)zero-mean version of template
Mat templateFloat = Mat(hs, ws, CV_32F);// to store the (smoothed) template
Mat imageFloat = Mat(hd, wd, CV_32F);// to store the (smoothed) input image
Mat imageWarped = Mat(hs, ws, CV_32F);// to store the warped zero-mean input image
Mat imageMask = Mat(hs, ws, CV_8U); // to store the final mask
Mat inputMaskMat = inputMask.getMat();
//to use it for mask warping
Mat preMask;
if(inputMask.empty())
preMask = Mat::ones(hd, wd, CV_8U);
else
threshold(inputMask, preMask, 0, 1, THRESH_BINARY);
//gaussian filtering is optional
src.convertTo(templateFloat, templateFloat.type());
GaussianBlur(templateFloat, templateFloat, Size(gaussFiltSize, gaussFiltSize), 0, 0);
Mat preMaskFloat;
preMask.convertTo(preMaskFloat, CV_32F);
GaussianBlur(preMaskFloat, preMaskFloat, Size(gaussFiltSize, gaussFiltSize), 0, 0);
// Change threshold.
preMaskFloat *= (0.5/0.95);
// Rounding conversion.
preMaskFloat.convertTo(preMask, preMask.type());
preMask.convertTo(preMaskFloat, preMaskFloat.type());
dst.convertTo(imageFloat, imageFloat.type());
GaussianBlur(imageFloat, imageFloat, Size(gaussFiltSize, gaussFiltSize), 0, 0);
// needed matrices for gradients and warped gradients
Mat gradientX = Mat::zeros(hd, wd, CV_32FC1);
Mat gradientY = Mat::zeros(hd, wd, CV_32FC1);
Mat gradientXWarped = Mat(hs, ws, CV_32FC1);
Mat gradientYWarped = Mat(hs, ws, CV_32FC1);
// calculate first order image derivatives
Matx13f dx(-0.5f, 0.0f, 0.5f);
filter2D(imageFloat, gradientX, -1, dx);
filter2D(imageFloat, gradientY, -1, dx.t());
gradientX = gradientX.mul(preMaskFloat);
gradientY = gradientY.mul(preMaskFloat);
// matrices needed for solving linear equation system for maximizing ECC
Mat jacobian = Mat(hs, ws*numberOfParameters, CV_32F);
Mat hessian = Mat(numberOfParameters, numberOfParameters, CV_32F);
Mat hessianInv = Mat(numberOfParameters, numberOfParameters, CV_32F);
Mat imageProjection = Mat(numberOfParameters, 1, CV_32F);
Mat templateProjection = Mat(numberOfParameters, 1, CV_32F);
Mat imageProjectionHessian = Mat(numberOfParameters, 1, CV_32F);
Mat errorProjection = Mat(numberOfParameters, 1, CV_32F);
Mat deltaP = Mat(numberOfParameters, 1, CV_32F);//transformation parameter correction
Mat error = Mat(hs, ws, CV_32F);//error as 2D matrix
const int imageFlags = INTER_LINEAR + WARP_INVERSE_MAP;
const int maskFlags = INTER_NEAREST + WARP_INVERSE_MAP;
// iteratively update map_matrix
double rho = -1;
double last_rho = - termination_eps;
for (int i = 1; (i <= numberOfIterations) && (fabs(rho-last_rho)>= termination_eps); i++)
{
// warp-back portion of the inputImage and gradients to the coordinate space of the templateImage
if (motionType != MOTION_HOMOGRAPHY)
{
warpAffine(imageFloat, imageWarped, map, imageWarped.size(), imageFlags);
warpAffine(gradientX, gradientXWarped, map, gradientXWarped.size(), imageFlags);
warpAffine(gradientY, gradientYWarped, map, gradientYWarped.size(), imageFlags);
warpAffine(preMask, imageMask, map, imageMask.size(), maskFlags);
}
else
{
warpPerspective(imageFloat, imageWarped, map, imageWarped.size(), imageFlags);
warpPerspective(gradientX, gradientXWarped, map, gradientXWarped.size(), imageFlags);
warpPerspective(gradientY, gradientYWarped, map, gradientYWarped.size(), imageFlags);
warpPerspective(preMask, imageMask, map, imageMask.size(), maskFlags);
}
Scalar imgMean, imgStd, tmpMean, tmpStd;
meanStdDev(imageWarped, imgMean, imgStd, imageMask);
meanStdDev(templateFloat, tmpMean, tmpStd, imageMask);
subtract(imageWarped, imgMean, imageWarped, imageMask);//zero-mean input
templateZM = Mat::zeros(templateZM.rows, templateZM.cols, templateZM.type());
subtract(templateFloat, tmpMean, templateZM, imageMask);//zero-mean template
const double tmpNorm = std::sqrt(countNonZero(imageMask)*(tmpStd.val[0])*(tmpStd.val[0]));
const double imgNorm = std::sqrt(countNonZero(imageMask)*(imgStd.val[0])*(imgStd.val[0]));
// calculate jacobian of image wrt parameters
switch (motionType){
case MOTION_AFFINE:
image_jacobian_affine_ECC(gradientXWarped, gradientYWarped, Xgrid, Ygrid, jacobian);
break;
case MOTION_HOMOGRAPHY:
image_jacobian_homo_ECC(gradientXWarped, gradientYWarped, Xgrid, Ygrid, map, jacobian);
break;
case MOTION_TRANSLATION:
image_jacobian_translation_ECC(gradientXWarped, gradientYWarped, jacobian);
break;
case MOTION_EUCLIDEAN:
image_jacobian_euclidean_ECC(gradientXWarped, gradientYWarped, Xgrid, Ygrid, map, jacobian);
break;
}
// calculate Hessian and its inverse
project_onto_jacobian_ECC(jacobian, jacobian, hessian);
hessianInv = hessian.inv();
const double correlation = templateZM.dot(imageWarped);
// calculate enhanced correlation coefficient (ECC)->rho
last_rho = rho;
rho = correlation/(imgNorm*tmpNorm);
if (cvIsNaN(rho)) {
CV_Error(Error::StsNoConv, "NaN encountered.");
}
// project images into jacobian
project_onto_jacobian_ECC( jacobian, imageWarped, imageProjection);
project_onto_jacobian_ECC(jacobian, templateZM, templateProjection);
// calculate the parameter lambda to account for illumination variation
imageProjectionHessian = hessianInv*imageProjection;
const double lambda_n = (imgNorm*imgNorm) - imageProjection.dot(imageProjectionHessian);
const double lambda_d = correlation - templateProjection.dot(imageProjectionHessian);
if (lambda_d <= 0.0)
{
rho = -1;
CV_Error(Error::StsNoConv, "The algorithm stopped before its convergence. The correlation is going to be minimized. Images may be uncorrelated or non-overlapped");
}
const double lambda = (lambda_n/lambda_d);
// estimate the update step delta_p
error = lambda*templateZM - imageWarped;
project_onto_jacobian_ECC(jacobian, error, errorProjection);
deltaP = hessianInv * errorProjection;
// update warping matrix
update_warping_matrix_ECC( map, deltaP, motionType);
}
// return final correlation coefficient
return rho;
}
double cv::findTransformECC(InputArray templateImage, InputArray inputImage,
InputOutputArray warpMatrix, int motionType,
TermCriteria criteria,
InputArray inputMask)
{
// Use default value of 5 for gaussFiltSize to maintain backward compatibility.
return findTransformECC(templateImage, inputImage, warpMatrix, motionType, criteria, inputMask, 5);
}
/* End of file. */

View File

@@ -0,0 +1,134 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
namespace cv
{
KalmanFilter::KalmanFilter() {}
KalmanFilter::KalmanFilter(int dynamParams, int measureParams, int controlParams, int type)
{
init(dynamParams, measureParams, controlParams, type);
}
void KalmanFilter::init(int DP, int MP, int CP, int type)
{
CV_Assert( DP > 0 && MP > 0 );
CV_Assert( type == CV_32F || type == CV_64F );
CP = std::max(CP, 0);
statePre = Mat::zeros(DP, 1, type);
statePost = Mat::zeros(DP, 1, type);
transitionMatrix = Mat::eye(DP, DP, type);
processNoiseCov = Mat::eye(DP, DP, type);
measurementMatrix = Mat::zeros(MP, DP, type);
measurementNoiseCov = Mat::eye(MP, MP, type);
errorCovPre = Mat::zeros(DP, DP, type);
errorCovPost = Mat::zeros(DP, DP, type);
gain = Mat::zeros(DP, MP, type);
if( CP > 0 )
controlMatrix = Mat::zeros(DP, CP, type);
else
controlMatrix.release();
temp1.create(DP, DP, type);
temp2.create(MP, DP, type);
temp3.create(MP, MP, type);
temp4.create(MP, DP, type);
temp5.create(MP, 1, type);
}
const Mat& KalmanFilter::predict(const Mat& control)
{
CV_INSTRUMENT_REGION();
// update the state: x'(k) = A*x(k)
statePre = transitionMatrix*statePost;
if( !control.empty() )
// x'(k) = x'(k) + B*u(k)
statePre += controlMatrix*control;
// update error covariance matrices: temp1 = A*P(k)
temp1 = transitionMatrix*errorCovPost;
// P'(k) = temp1*At + Q
gemm(temp1, transitionMatrix, 1, processNoiseCov, 1, errorCovPre, GEMM_2_T);
// handle the case when there will be measurement before the next predict.
statePre.copyTo(statePost);
errorCovPre.copyTo(errorCovPost);
return statePre;
}
const Mat& KalmanFilter::correct(const Mat& measurement)
{
CV_INSTRUMENT_REGION();
// temp2 = H*P'(k)
temp2 = measurementMatrix * errorCovPre;
// temp3 = temp2*Ht + R
gemm(temp2, measurementMatrix, 1, measurementNoiseCov, 1, temp3, GEMM_2_T);
// temp4 = inv(temp3)*temp2 = Kt(k)
solve(temp3, temp2, temp4, DECOMP_SVD);
// K(k)
gain = temp4.t();
// temp5 = z(k) - H*x'(k)
temp5 = measurement - measurementMatrix*statePre;
// x(k) = x'(k) + K(k)*temp5
statePost = statePre + gain*temp5;
// P(k) = P'(k) - K(k)*temp2
errorCovPost = errorCovPre - gain*temp2;
return statePost;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,48 @@
#pragma once
namespace cv
{
namespace detail
{
typedef short deriv_type;
struct SharrDerivInvoker : ParallelLoopBody
{
SharrDerivInvoker(const Mat& _src, const Mat& _dst)
: src(_src), dst(_dst)
{ }
void operator()(const Range& range) const CV_OVERRIDE;
const Mat& src;
const Mat& dst;
};
struct LKTrackerInvoker : ParallelLoopBody
{
LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
const Point2f* _prevPts, Point2f* _nextPts,
uchar* _status, float* _err,
Size _winSize, TermCriteria _criteria,
int _level, int _maxLevel, int _flags, float _minEigThreshold );
void operator()(const Range& range) const CV_OVERRIDE;
const Mat* prevImg;
const Mat* nextImg;
const Mat* prevDeriv;
const Point2f* prevPts;
Point2f* nextPts;
uchar* status;
float* err;
Size winSize;
TermCriteria criteria;
int level;
int maxLevel;
int flags;
float minEigThreshold;
};
}// namespace detail
}// namespace cv

View File

@@ -0,0 +1,248 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2018 Ya-Chiu Wu, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Ya-Chiu Wu, yacwu@cs.nctu.edu.tw
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if CN==1
#define T_MEAN float
#define F_ZERO (0.0f)
#define frameToMean(a, b) (b) = *(a);
#define meanToFrame(a, b) *b = convert_uchar_sat(a);
#else
#define T_MEAN float4
#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
#define meanToFrame(a, b)\
b[0] = convert_uchar_sat(a.x); \
b[1] = convert_uchar_sat(a.y); \
b[2] = convert_uchar_sat(a.z);
#define frameToMean(a, b)\
b.x = a[0]; \
b.y = a[1]; \
b.z = a[2]; \
b.w = 0.0f;
#endif
__kernel void knn_kernel(__global const uchar* frame, int frame_step, int frame_offset, int frame_row, int frame_col,
__global const uchar* nNextLongUpdate,
__global const uchar* nNextMidUpdate,
__global const uchar* nNextShortUpdate,
__global uchar* aModelIndexLong,
__global uchar* aModelIndexMid,
__global uchar* aModelIndexShort,
__global uchar* flag,
__global uchar* sample,
__global uchar* fgmask, int fgmask_step, int fgmask_offset,
int nLongCounter, int nMidCounter, int nShortCounter,
float c_Tb, int c_nkNN, float c_tau
#ifdef SHADOW_DETECT
, uchar c_shadowVal
#endif
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if( x < frame_col && y < frame_row)
{
__global const uchar* _frame = (frame + mad24(y, frame_step, mad24(x, CN, frame_offset)));
T_MEAN pix;
frameToMean(_frame, pix);
uchar foreground = 255; // 0 - the pixel classified as background
int Pbf = 0;
int Pb = 0;
uchar include = 0;
int pt_idx = mad24(y, frame_col, x);
int idx_step = frame_row * frame_col;
__global T_MEAN* _sample = (__global T_MEAN*)(sample);
for (uchar n = 0; n < (NSAMPLES) * 3 ; ++n)
{
int n_idx = mad24(n, idx_step, pt_idx);
T_MEAN c_mean = _sample[n_idx];
uchar c_flag = flag[n_idx];
T_MEAN diff = c_mean - pix;
float dist2 = dot(diff, diff);
if (dist2 < c_Tb)
{
Pbf++;
if (c_flag)
{
Pb++;
if (Pb >= c_nkNN)
{
include = 1;
foreground = 0;
break;
}
}
}
}
if (Pbf >= c_nkNN)
{
include = 1;
}
#ifdef SHADOW_DETECT
if (foreground)
{
int Ps = 0;
for (uchar n = 0; n < (NSAMPLES) * 3 ; ++n)
{
int n_idx = mad24(n, idx_step, pt_idx);
uchar c_flag = flag[n_idx];
if (c_flag)
{
T_MEAN c_mean = _sample[n_idx];
float numerator = dot(pix, c_mean);
float denominator = dot(c_mean, c_mean);
if (denominator == 0)
break;
if (numerator <= denominator && numerator >= c_tau * denominator)
{
float a = numerator / denominator;
T_MEAN dD = mad(a, c_mean, -pix);
if (dot(dD, dD) < c_Tb * a * a)
{
Ps++;
if (Ps >= c_nkNN)
{
foreground = c_shadowVal;
break;
}
}
}
}
}
}
#endif
__global uchar* _fgmask = fgmask + mad24(y, fgmask_step, x + fgmask_offset);
*_fgmask = (uchar)foreground;
__global const uchar* _nNextLongUpdate = nNextLongUpdate + pt_idx;
__global const uchar* _nNextMidUpdate = nNextMidUpdate + pt_idx;
__global const uchar* _nNextShortUpdate = nNextShortUpdate + pt_idx;
__global uchar* _aModelIndexLong = aModelIndexLong + pt_idx;
__global uchar* _aModelIndexMid = aModelIndexMid + pt_idx;
__global uchar* _aModelIndexShort = aModelIndexShort + pt_idx;
uchar nextLongUpdate = _nNextLongUpdate[0];
uchar nextMidUpdate = _nNextMidUpdate[0];
uchar nextShortUpdate = _nNextShortUpdate[0];
uchar modelIndexLong = _aModelIndexLong[0];
uchar modelIndexMid = _aModelIndexMid[0];
uchar modelIndexShort = _aModelIndexShort[0];
int offsetLong = mad24(mad24(2, (NSAMPLES), modelIndexLong), idx_step, pt_idx);
int offsetMid = mad24((NSAMPLES)+modelIndexMid, idx_step, pt_idx);
int offsetShort = mad24(modelIndexShort, idx_step, pt_idx);
if (nextLongUpdate == nLongCounter)
{
_sample[offsetLong] = _sample[offsetMid];
flag[offsetLong] = flag[offsetMid];
_aModelIndexLong[0] = (modelIndexLong >= ((NSAMPLES)-1)) ? 0 : (modelIndexLong + 1);
}
if (nextMidUpdate == nMidCounter)
{
_sample[offsetMid] = _sample[offsetShort];
flag[offsetMid] = flag[offsetShort];
_aModelIndexMid[0] = (modelIndexMid >= ((NSAMPLES)-1)) ? 0 : (modelIndexMid + 1);
}
if (nextShortUpdate == nShortCounter)
{
_sample[offsetShort] = pix;
flag[offsetShort] = include;
_aModelIndexShort[0] = (modelIndexShort >= ((NSAMPLES)-1)) ? 0 : (modelIndexShort + 1);
}
}
}
__kernel void getBackgroundImage2_kernel(__global const uchar* flag,
__global const uchar* sample,
__global uchar* dst, int dst_step, int dst_offset, int dst_row, int dst_col)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < dst_col && y < dst_row)
{
int pt_idx = mad24(y, dst_col, x);
T_MEAN meanVal = (T_MEAN)F_ZERO;
__global T_MEAN* _sample = (__global T_MEAN*)(sample);
int idx_step = dst_row * dst_col;
for (uchar n = 0; n < (NSAMPLES) * 3 ; ++n)
{
int n_idx = mad24(n, idx_step, pt_idx);
uchar c_flag = flag[n_idx];
if(c_flag)
{
meanVal = _sample[n_idx];
break;
}
}
__global uchar* _dst = dst + mad24(y, dst_step, mad24(x, CN, dst_offset));
meanToFrame(meanVal, _dst);
}
}

View File

@@ -0,0 +1,282 @@
#if CN==1
#define T_MEAN float
#define F_ZERO (0.0f)
#define cnMode 1
#define frameToMean(a, b) (b) = *(a);
#if FL==0
#define meanToFrame(a, b) *b = convert_uchar_sat(a);
#else
#define meanToFrame(a, b) *b = (float)a;
#endif
#else
#define T_MEAN float4
#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
#define cnMode 4
#if FL == 0
#define meanToFrame(a, b)\
b[0] = convert_uchar_sat(a.x); \
b[1] = convert_uchar_sat(a.y); \
b[2] = convert_uchar_sat(a.z);
#else
#define meanToFrame(a, b)\
b[0] = a.x; \
b[1] = a.y; \
b[2] = a.z;
#endif
#define frameToMean(a, b)\
b.x = a[0]; \
b.y = a[1]; \
b.z = a[2]; \
b.w = 0.0f;
#endif
__kernel void mog2_kernel(__global const uchar* frame, int frame_step, int frame_offset, int frame_row, int frame_col, //uchar || uchar3
__global uchar* modesUsed, //uchar
__global uchar* weight, //float
__global uchar* mean, //T_MEAN=float || float4
__global uchar* variance, //float
__global uchar* fgmask, int fgmask_step, int fgmask_offset, //uchar
float alphaT, float alpha1, float prune,
float c_Tb, float c_TB, float c_Tg, float c_varMin, //constants
float c_varMax, float c_varInit, float c_tau
#ifdef SHADOW_DETECT
, uchar c_shadowVal
#endif
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if( x < frame_col && y < frame_row)
{
#if FL==0
__global const uchar* _frame = (frame + mad24(y, frame_step, mad24(x, CN, frame_offset)));
#else
__global const float* _frame = ((__global const float*)( frame + mad24(y, frame_step, frame_offset)) + mad24(x, CN, 0));
#endif
T_MEAN pix;
frameToMean(_frame, pix);
uchar foreground = 255; // 0 - the pixel classified as background
bool fitsPDF = false; //if it remains zero a new GMM mode will be added
int pt_idx = mad24(y, frame_col, x);
int idx_step = frame_row * frame_col;
__global uchar* _modesUsed = modesUsed + pt_idx;
uchar nmodes = _modesUsed[0];
float totalWeight = 0.0f;
__global float* _weight = (__global float*)(weight);
__global float* _variance = (__global float*)(variance);
__global T_MEAN* _mean = (__global T_MEAN*)(mean);
uchar mode = 0;
for (; mode < nmodes; ++mode)
{
int mode_idx = mad24(mode, idx_step, pt_idx);
float c_weight = mad(alpha1, _weight[mode_idx], prune);
float c_var = _variance[mode_idx];
T_MEAN c_mean = _mean[mode_idx];
T_MEAN diff = c_mean - pix;
float dist2 = dot(diff, diff);
if (totalWeight < c_TB && dist2 < c_Tb * c_var)
foreground = 0;
if (dist2 < c_Tg * c_var)
{
fitsPDF = true;
c_weight += alphaT;
float k = alphaT / c_weight;
T_MEAN mean_new = mad((T_MEAN)-k, diff, c_mean);
float variance_new = clamp(mad(k, (dist2 - c_var), c_var), c_varMin, c_varMax);
for (int i = mode; i > 0; --i)
{
int prev_idx = mode_idx - idx_step;
if (c_weight < _weight[prev_idx])
break;
_weight[mode_idx] = _weight[prev_idx];
_variance[mode_idx] = _variance[prev_idx];
_mean[mode_idx] = _mean[prev_idx];
mode_idx = prev_idx;
}
_mean[mode_idx] = mean_new;
_variance[mode_idx] = variance_new;
_weight[mode_idx] = c_weight; //update weight by the calculated value
totalWeight += c_weight;
mode ++;
break;
}
if (c_weight < -prune)
c_weight = 0.0f;
_weight[mode_idx] = c_weight; //update weight by the calculated value
totalWeight += c_weight;
}
for (; mode < nmodes; ++mode)
{
int mode_idx = mad24(mode, idx_step, pt_idx);
float c_weight = mad(alpha1, _weight[mode_idx], prune);
if (c_weight < -prune)
{
c_weight = 0.0f;
nmodes = mode;
break;
}
_weight[mode_idx] = c_weight; //update weight by the calculated value
totalWeight += c_weight;
}
if (0.f < totalWeight)
{
totalWeight = 1.f / totalWeight;
for (int mode = 0; mode < nmodes; ++mode)
_weight[mad24(mode, idx_step, pt_idx)] *= totalWeight;
}
if (!fitsPDF)
{
uchar mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++;
int mode_idx = mad24(mode, idx_step, pt_idx);
if (nmodes == 1)
_weight[mode_idx] = 1.f;
else
{
_weight[mode_idx] = alphaT;
for (int i = pt_idx; i < mode_idx; i += idx_step)
_weight[i] *= alpha1;
}
for (int i = nmodes - 1; i > 0; --i)
{
int prev_idx = mode_idx - idx_step;
if (alphaT < _weight[prev_idx])
break;
_weight[mode_idx] = _weight[prev_idx];
_variance[mode_idx] = _variance[prev_idx];
_mean[mode_idx] = _mean[prev_idx];
mode_idx = prev_idx;
}
_mean[mode_idx] = pix;
_variance[mode_idx] = c_varInit;
}
_modesUsed[0] = nmodes;
#ifdef SHADOW_DETECT
if (foreground)
{
float tWeight = 0.0f;
for (uchar mode = 0; mode < nmodes; ++mode)
{
int mode_idx = mad24(mode, idx_step, pt_idx);
T_MEAN c_mean = _mean[mode_idx];
float numerator = dot(pix, c_mean);
float denominator = dot(c_mean, c_mean);
if (denominator == 0)
break;
if (numerator <= denominator && numerator >= c_tau * denominator)
{
float a = numerator / denominator;
T_MEAN dD = mad(a, c_mean, -pix);
if (dot(dD, dD) < c_Tb * _variance[mode_idx] * a * a)
{
foreground = c_shadowVal;
break;
}
}
tWeight += _weight[mode_idx];
if (tWeight > c_TB)
break;
}
}
#endif
__global uchar* _fgmask = fgmask + mad24(y, fgmask_step, x + fgmask_offset);
*_fgmask = (uchar)foreground;
}
}
__kernel void getBackgroundImage2_kernel(__global const uchar* modesUsed,
__global const uchar* weight,
__global const uchar* mean,
__global uchar* dst, int dst_step, int dst_offset, int dst_row, int dst_col,
float c_TB)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < dst_col && y < dst_row)
{
int pt_idx = mad24(y, dst_col, x);
__global const uchar* _modesUsed = modesUsed + pt_idx;
uchar nmodes = _modesUsed[0];
T_MEAN meanVal = (T_MEAN)F_ZERO;
float totalWeight = 0.0f;
__global const float* _weight = (__global const float*)weight;
__global const T_MEAN* _mean = (__global const T_MEAN*)(mean);
int idx_step = dst_row * dst_col;
for (uchar mode = 0; mode < nmodes; ++mode)
{
int mode_idx = mad24(mode, idx_step, pt_idx);
float c_weight = _weight[mode_idx];
T_MEAN c_mean = _mean[mode_idx];
meanVal = mad(c_weight, c_mean, meanVal);
totalWeight += c_weight;
if (totalWeight > c_TB)
break;
}
if (0.f < totalWeight)
meanVal = meanVal / totalWeight;
else
meanVal = (T_MEAN)(0.f);
#if FL==0
__global uchar* _dst = dst + mad24(y, dst_step, mad24(x, CN, dst_offset));
meanToFrame(meanVal, _dst);
#else
__global float* _dst = ((__global float*)( dst + mad24(y, dst_step, dst_offset)) + mad24(x, CN, 0));
meanToFrame(meanVal, _dst);
#endif
}
}

View File

@@ -0,0 +1,540 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//#define CV_USE_SUBGROUPS
#define EPS 0.001f
#define INF 1E+10F
//#define DIS_BORDER_SIZE xxx
//#define DIS_PATCH_SIZE xxx
//#define DIS_PATCH_STRIDE xxx
#define DIS_PATCH_SIZE_HALF (DIS_PATCH_SIZE / 2)
#ifndef DIS_BORDER_SIZE
__kernel void dis_precomputeStructureTensor_hor(__global const short *I0x,
__global const short *I0y,
int w, int h, int ws,
__global float *I0xx_aux_ptr,
__global float *I0yy_aux_ptr,
__global float *I0xy_aux_ptr,
__global float *I0x_aux_ptr,
__global float *I0y_aux_ptr)
{
int i = get_global_id(0);
if (i >= h) return;
const __global short *x_row = I0x + i * w;
const __global short *y_row = I0y + i * w;
float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f;
float8 x_vec = convert_float8(vload8(0, x_row));
float8 y_vec = convert_float8(vload8(0, y_row));
sum_xx = dot(x_vec.lo, x_vec.lo) + dot(x_vec.hi, x_vec.hi);
sum_yy = dot(y_vec.lo, y_vec.lo) + dot(y_vec.hi, y_vec.hi);
sum_xy = dot(x_vec.lo, y_vec.lo) + dot(x_vec.hi, y_vec.hi);
sum_x = dot(x_vec.lo, 1.0f) + dot(x_vec.hi, 1.0f);
sum_y = dot(y_vec.lo, 1.0f) + dot(y_vec.hi, 1.0f);
I0xx_aux_ptr[i * ws] = sum_xx;
I0yy_aux_ptr[i * ws] = sum_yy;
I0xy_aux_ptr[i * ws] = sum_xy;
I0x_aux_ptr[i * ws] = sum_x;
I0y_aux_ptr[i * ws] = sum_y;
int js = 1;
for (int j = DIS_PATCH_SIZE; j < w; j++)
{
short x_val1 = x_row[j];
short x_val2 = x_row[j - DIS_PATCH_SIZE];
short y_val1 = y_row[j];
short y_val2 = y_row[j - DIS_PATCH_SIZE];
sum_xx += (x_val1 * x_val1 - x_val2 * x_val2);
sum_yy += (y_val1 * y_val1 - y_val2 * y_val2);
sum_xy += (x_val1 * y_val1 - x_val2 * y_val2);
sum_x += (x_val1 - x_val2);
sum_y += (y_val1 - y_val2);
if ((j - DIS_PATCH_SIZE + 1) % DIS_PATCH_STRIDE == 0)
{
int index = i * ws + js;
I0xx_aux_ptr[index] = sum_xx;
I0yy_aux_ptr[index] = sum_yy;
I0xy_aux_ptr[index] = sum_xy;
I0x_aux_ptr[index] = sum_x;
I0y_aux_ptr[index] = sum_y;
js++;
}
}
}
__kernel void dis_precomputeStructureTensor_ver(__global const float *I0xx_aux_ptr,
__global const float *I0yy_aux_ptr,
__global const float *I0xy_aux_ptr,
__global const float *I0x_aux_ptr,
__global const float *I0y_aux_ptr,
int w, int h, int ws,
__global float *I0xx_ptr,
__global float *I0yy_ptr,
__global float *I0xy_ptr,
__global float *I0x_ptr,
__global float *I0y_ptr)
{
int j = get_global_id(0);
if (j >= ws) return;
float sum_xx, sum_yy, sum_xy, sum_x, sum_y;
sum_xx = sum_yy = sum_xy = sum_x = sum_y = 0.0f;
for (int i = 0; i < DIS_PATCH_SIZE; i++)
{
sum_xx += I0xx_aux_ptr[i * ws + j];
sum_yy += I0yy_aux_ptr[i * ws + j];
sum_xy += I0xy_aux_ptr[i * ws + j];
sum_x += I0x_aux_ptr[i * ws + j];
sum_y += I0y_aux_ptr[i * ws + j];
}
I0xx_ptr[j] = sum_xx;
I0yy_ptr[j] = sum_yy;
I0xy_ptr[j] = sum_xy;
I0x_ptr[j] = sum_x;
I0y_ptr[j] = sum_y;
int is = 1;
for (int i = DIS_PATCH_SIZE; i < h; i++)
{
sum_xx += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - DIS_PATCH_SIZE) * ws + j]);
sum_yy += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - DIS_PATCH_SIZE) * ws + j]);
sum_xy += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - DIS_PATCH_SIZE) * ws + j]);
sum_x += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - DIS_PATCH_SIZE) * ws + j]);
sum_y += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - DIS_PATCH_SIZE) * ws + j]);
if ((i - DIS_PATCH_SIZE + 1) % DIS_PATCH_STRIDE == 0)
{
I0xx_ptr[is * ws + j] = sum_xx;
I0yy_ptr[is * ws + j] = sum_yy;
I0xy_ptr[is * ws + j] = sum_xy;
I0x_ptr[is * ws + j] = sum_x;
I0y_ptr[is * ws + j] = sum_y;
is++;
}
}
}
__kernel void dis_densification(__global const float2 *S_ptr,
__global const uchar *i0, __global const uchar *i1,
int w, int h, int ws,
__global float2 *U_ptr)
{
int x = get_global_id(0);
int y = get_global_id(1);
int i, j;
if (x >= w || y >= h) return;
int start_is, end_is;
int start_js, end_js;
end_is = min(y / DIS_PATCH_STRIDE, (h - DIS_PATCH_SIZE) / DIS_PATCH_STRIDE);
start_is = max(0, y - DIS_PATCH_SIZE + DIS_PATCH_STRIDE) / DIS_PATCH_STRIDE;
start_is = min(start_is, end_is);
end_js = min(x / DIS_PATCH_STRIDE, (w - DIS_PATCH_SIZE) / DIS_PATCH_STRIDE);
start_js = max(0, x - DIS_PATCH_SIZE + DIS_PATCH_STRIDE) / DIS_PATCH_STRIDE;
start_js = min(start_js, end_js);
float sum_coef = 0.0f;
float2 sum_U = (float2)(0.0f, 0.0f);
int i_l, i_u;
int j_l, j_u;
float i_m, j_m, diff;
i = y;
j = x;
/* Iterate through all the patches that overlap the current location (i,j) */
for (int is = start_is; is <= end_is; is++)
for (int js = start_js; js <= end_js; js++)
{
float2 s_val = S_ptr[is * ws + js];
uchar2 i1_vec1, i1_vec2;
j_m = min(max(j + s_val.x, 0.0f), w - 1.0f - EPS);
i_m = min(max(i + s_val.y, 0.0f), h - 1.0f - EPS);
j_l = (int)j_m;
j_u = j_l + 1;
i_l = (int)i_m;
i_u = i_l + 1;
i1_vec1 = vload2(0, i1 + i_u * w + j_l);
i1_vec2 = vload2(0, i1 + i_l * w + j_l);
diff = (j_m - j_l) * (i_m - i_l) * i1_vec1.y +
(j_u - j_m) * (i_m - i_l) * i1_vec1.x +
(j_m - j_l) * (i_u - i_m) * i1_vec2.y +
(j_u - j_m) * (i_u - i_m) * i1_vec2.x - i0[i * w + j];
float coef = 1.0f / max(1.0f, fabs(diff));
sum_U += coef * s_val;
sum_coef += coef;
}
float inv_sum_coef = 1.0 / sum_coef;
U_ptr[i * w + j] = sum_U * inv_sum_coef;
}
#else // DIS_BORDER_SIZE
#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \
i_I1 = clamp(i + Uy + DIS_BORDER_SIZE, i_lower_limit, i_upper_limit); \
j_I1 = clamp(j + Ux + DIS_BORDER_SIZE, j_lower_limit, j_upper_limit); \
{ \
float di = i_I1 - floor(i_I1); \
float dj = j_I1 - floor(j_I1); \
w11 = di * dj; \
w10 = di * (1 - dj); \
w01 = (1 - di) * dj; \
w00 = (1 - di) * (1 - dj); \
}
float computeSSDMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr,
int I0_stride, int I1_stride,
float w00, float w01, float w10, float w11, int i
#ifndef CV_USE_SUBGROUPS
, __local float2 *smem /*[8]*/
#endif
)
{
float sum_diff = 0.0f, sum_diff_sq = 0.0f;
int n = DIS_PATCH_SIZE * DIS_PATCH_SIZE;
uchar8 I1_vec1, I1_vec2, I0_vec;
uchar I1_val1, I1_val2;
I0_vec = vload8(0, I0_ptr + i * I0_stride);
I1_vec1 = vload8(0, I1_ptr + i * I1_stride);
I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride);
I1_val1 = I1_ptr[i * I1_stride + 8];
I1_val2 = I1_ptr[(i + 1) * I1_stride + 8];
float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) +
w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) -
convert_float8(I0_vec);
sum_diff = (dot(vec.lo, 1.0) + dot(vec.hi, 1.0));
sum_diff_sq = (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi));
#ifdef CV_USE_SUBGROUPS
sum_diff = sub_group_reduce_add(sum_diff);
sum_diff_sq = sub_group_reduce_add(sum_diff_sq);
#else
barrier(CLK_LOCAL_MEM_FENCE);
smem[i] = (float2)(sum_diff, sum_diff_sq);
barrier(CLK_LOCAL_MEM_FENCE);
if (i < 4)
smem[i] += smem[i + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if (i < 2)
smem[i] += smem[i + 2];
barrier(CLK_LOCAL_MEM_FENCE);
if (i == 0)
smem[0] += smem[1];
barrier(CLK_LOCAL_MEM_FENCE);
float2 reduce_add_result = smem[0];
sum_diff = reduce_add_result.x;
sum_diff_sq = reduce_add_result.y;
#endif
return sum_diff_sq - sum_diff * sum_diff / n;
}
__attribute__((reqd_work_group_size(8, 1, 1)))
__kernel void dis_patch_inverse_search_fwd_1(__global const float2 *U_ptr,
__global const uchar *I0_ptr, __global const uchar *I1_ptr,
int w, int h, int ws, int hs,
__global float2 *S_ptr)
{
int id = get_global_id(0);
int is = get_group_id(0);
int i = is * DIS_PATCH_STRIDE;
int j = 0;
int w_ext = w + 2 * DIS_BORDER_SIZE;
float i_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
float i_upper_limit = DIS_BORDER_SIZE + h - 1.0f;
float j_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
float j_upper_limit = DIS_BORDER_SIZE + w - 1.0f;
float2 prev_U = U_ptr[(i + DIS_PATCH_SIZE_HALF) * w + j + DIS_PATCH_SIZE_HALF];
S_ptr[is * ws] = prev_U;
j += DIS_PATCH_STRIDE;
#ifdef CV_USE_SUBGROUPS
int sid = get_sub_group_local_id();
#define EXTRA_ARGS_computeSSDMeanNorm sid
#else
__local float2 smem[8];
int sid = get_local_id(0);
#define EXTRA_ARGS_computeSSDMeanNorm sid, smem
#endif
for (int js = 1; js < ws; js++, j += DIS_PATCH_STRIDE)
{
float2 U = U_ptr[(i + DIS_PATCH_SIZE_HALF) * w + j + DIS_PATCH_SIZE_HALF];
float i_I1, j_I1, w00, w01, w10, w11;
INIT_BILINEAR_WEIGHTS(U.x, U.y);
float min_SSD = computeSSDMeanNorm(
I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, EXTRA_ARGS_computeSSDMeanNorm);
INIT_BILINEAR_WEIGHTS(prev_U.x, prev_U.y);
float cur_SSD = computeSSDMeanNorm(
I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, EXTRA_ARGS_computeSSDMeanNorm);
prev_U = (cur_SSD < min_SSD) ? prev_U : U;
S_ptr[is * ws + js] = prev_U;
}
#undef EXTRA_ARGS_computeSSDMeanNorm
}
#endif // DIS_BORDER_SIZE
float4 processPatchMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr,
const __global short *I0x_ptr, const __global short *I0y_ptr,
int I0_stride, int I1_stride, float w00, float w01, float w10,
float w11, float x_grad_sum, float y_grad_sum)
{
const float inv_n = 1.0f / (float)(DIS_PATCH_SIZE * DIS_PATCH_SIZE);
float sum_diff = 0.0, sum_diff_sq = 0.0;
float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0;
uchar8 I1_vec1;
uchar8 I1_vec2 = vload8(0, I1_ptr);
uchar I1_val1;
uchar I1_val2 = I1_ptr[DIS_PATCH_SIZE];
for (int i = 0; i < 8; i++)
{
uchar8 I0_vec = vload8(0, I0_ptr + i * I0_stride);
I1_vec1 = I1_vec2;
I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride);
I1_val1 = I1_val2;
I1_val2 = I1_ptr[(i + 1) * I1_stride + DIS_PATCH_SIZE];
float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) +
w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) -
convert_float8(I0_vec);
sum_diff += (dot(vec.lo, 1.0) + dot(vec.hi, 1.0));
sum_diff_sq += (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi));
short8 I0x_vec = vload8(0, I0x_ptr + i * I0_stride);
short8 I0y_vec = vload8(0, I0y_ptr + i * I0_stride);
sum_I0x_mul += dot(vec.lo, convert_float4(I0x_vec.lo));
sum_I0x_mul += dot(vec.hi, convert_float4(I0x_vec.hi));
sum_I0y_mul += dot(vec.lo, convert_float4(I0y_vec.lo));
sum_I0y_mul += dot(vec.hi, convert_float4(I0y_vec.hi));
}
float dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum * inv_n;
float dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum * inv_n;
float SSD = sum_diff_sq - sum_diff * sum_diff * inv_n;
return (float4)(SSD, dst_dUx, dst_dUy, 0);
}
#ifdef DIS_BORDER_SIZE
__kernel void dis_patch_inverse_search_fwd_2(__global const float2 *U_ptr,
__global const uchar *I0_ptr, __global const uchar *I1_ptr,
__global const short *I0x_ptr, __global const short *I0y_ptr,
__global const float *xx_ptr, __global const float *yy_ptr,
__global const float *xy_ptr,
__global const float *x_ptr, __global const float *y_ptr,
int w, int h, int ws, int hs, int num_inner_iter,
__global float2 *S_ptr)
{
int js = get_global_id(0);
int is = get_global_id(1);
int i = is * DIS_PATCH_STRIDE;
int j = js * DIS_PATCH_STRIDE;
const int psz = DIS_PATCH_SIZE;
int w_ext = w + 2 * DIS_BORDER_SIZE;
int index = is * ws + js;
if (js >= ws || is >= hs) return;
float2 U0 = S_ptr[index];
float2 cur_U = U0;
float cur_xx = xx_ptr[index];
float cur_yy = yy_ptr[index];
float cur_xy = xy_ptr[index];
float detH = cur_xx * cur_yy - cur_xy * cur_xy;
float inv_detH = (fabs(detH) < EPS) ? 1.0 / EPS : 1.0 / detH;
float invH11 = cur_yy * inv_detH;
float invH12 = -cur_xy * inv_detH;
float invH22 = cur_xx * inv_detH;
float prev_SSD = INF;
float x_grad_sum = x_ptr[index];
float y_grad_sum = y_ptr[index];
const float i_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
const float i_upper_limit = DIS_BORDER_SIZE + h - 1.0f;
const float j_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
const float j_upper_limit = DIS_BORDER_SIZE + w - 1.0f;
for (int t = 0; t < num_inner_iter; t++)
{
float i_I1, j_I1, w00, w01, w10, w11;
INIT_BILINEAR_WEIGHTS(cur_U.x, cur_U.y);
float4 res = processPatchMeanNorm(
I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
I0x_ptr + i * w + j, I0y_ptr + i * w + j,
w, w_ext, w00, w01, w10, w11,
x_grad_sum, y_grad_sum);
float SSD = res.x;
float dUx = res.y;
float dUy = res.z;
float dx = invH11 * dUx + invH12 * dUy;
float dy = invH12 * dUx + invH22 * dUy;
cur_U -= (float2)(dx, dy);
if (SSD >= prev_SSD)
break;
prev_SSD = SSD;
}
float2 vec = cur_U - U0;
S_ptr[index] = (dot(vec, vec) <= (float)(DIS_PATCH_SIZE * DIS_PATCH_SIZE)) ? cur_U : U0;
}
__attribute__((reqd_work_group_size(8, 1, 1)))
__kernel void dis_patch_inverse_search_bwd_1(__global const uchar *I0_ptr, __global const uchar *I1_ptr,
int w, int h, int ws, int hs,
__global float2 *S_ptr)
{
int id = get_global_id(0);
int is = get_group_id(0);
is = (hs - 1 - is);
int i = is * DIS_PATCH_STRIDE;
int j = (ws - 2) * DIS_PATCH_STRIDE;
const int w_ext = w + 2 * DIS_BORDER_SIZE;
const float i_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
const float i_upper_limit = DIS_BORDER_SIZE + h - 1.0f;
const float j_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
const float j_upper_limit = DIS_BORDER_SIZE + w - 1.0f;
#ifdef CV_USE_SUBGROUPS
int sid = get_sub_group_local_id();
#define EXTRA_ARGS_computeSSDMeanNorm sid
#else
__local float2 smem[8];
int sid = get_local_id(0);
#define EXTRA_ARGS_computeSSDMeanNorm sid, smem
#endif
for (int js = (ws - 2); js > -1; js--, j -= DIS_PATCH_STRIDE)
{
float2 U0 = S_ptr[is * ws + js];
float2 U1 = S_ptr[is * ws + js + 1];
float i_I1, j_I1, w00, w01, w10, w11;
INIT_BILINEAR_WEIGHTS(U0.x, U0.y);
float min_SSD = computeSSDMeanNorm(
I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, EXTRA_ARGS_computeSSDMeanNorm);
INIT_BILINEAR_WEIGHTS(U1.x, U1.y);
float cur_SSD = computeSSDMeanNorm(
I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
w, w_ext, w00, w01, w10, w11, EXTRA_ARGS_computeSSDMeanNorm);
S_ptr[is * ws + js] = (cur_SSD < min_SSD) ? U1 : U0;
}
#undef EXTRA_ARGS_computeSSDMeanNorm
}
__kernel void dis_patch_inverse_search_bwd_2(__global const uchar *I0_ptr, __global const uchar *I1_ptr,
__global const short *I0x_ptr, __global const short *I0y_ptr,
__global const float *xx_ptr, __global const float *yy_ptr,
__global const float *xy_ptr,
__global const float *x_ptr, __global const float *y_ptr,
int w, int h, int ws, int hs, int num_inner_iter,
__global float2 *S_ptr)
{
int js = get_global_id(0);
int is = get_global_id(1);
if (js >= ws || is >= hs) return;
js = (ws - 1 - js);
is = (hs - 1 - is);
int j = js * DIS_PATCH_STRIDE;
int i = is * DIS_PATCH_STRIDE;
int w_ext = w + 2 * DIS_BORDER_SIZE;
int index = is * ws + js;
float2 U0 = S_ptr[index];
float2 cur_U = U0;
float cur_xx = xx_ptr[index];
float cur_yy = yy_ptr[index];
float cur_xy = xy_ptr[index];
float detH = cur_xx * cur_yy - cur_xy * cur_xy;
float inv_detH = (fabs(detH) < EPS) ? 1.0 / EPS : 1.0 / detH;
float invH11 = cur_yy * inv_detH;
float invH12 = -cur_xy * inv_detH;
float invH22 = cur_xx * inv_detH;
float prev_SSD = INF;
float x_grad_sum = x_ptr[index];
float y_grad_sum = y_ptr[index];
const float i_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
const float i_upper_limit = DIS_BORDER_SIZE + h - 1.0f;
const float j_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
const float j_upper_limit = DIS_BORDER_SIZE + w - 1.0f;
for (int t = 0; t < num_inner_iter; t++)
{
float i_I1, j_I1, w00, w01, w10, w11;
INIT_BILINEAR_WEIGHTS(cur_U.x, cur_U.y);
float4 res = processPatchMeanNorm(
I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
I0x_ptr + i * w + j, I0y_ptr + i * w + j,
w, w_ext, w00, w01, w10, w11,
x_grad_sum, y_grad_sum);
float SSD = res.x;
float dUx = res.y;
float dUy = res.z;
float dx = invH11 * dUx + invH12 * dUy;
float dy = invH12 * dUx + invH22 * dUy;
cur_U -= (float2)(dx, dy);
if (SSD >= prev_SSD)
break;
prev_SSD = SSD;
}
float2 vec = cur_U - U0;
S_ptr[index] = ((dot(vec, vec)) <= (float)(DIS_PATCH_SIZE * DIS_PATCH_SIZE)) ? cur_U : U0;
}
#endif // DIS_BORDER_SIZE

View File

@@ -0,0 +1,429 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Sen Liu, swjtuls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#define tx (int)get_local_id(0)
#define ty get_local_id(1)
#define bx get_group_id(0)
#define bdx (int)get_local_size(0)
#define BORDER_SIZE 5
#define MAX_KSIZE_HALF 100
#ifndef polyN
#define polyN 5
#endif
#if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define TYPE double
#define VECTYPE double4
#else
#define TYPE float
#define VECTYPE float4
#endif
__kernel void polynomialExpansion(__global __const float * src, int srcStep,
__global float * dst, int dstStep,
const int rows, const int cols,
__global __const float * c_g,
__global __const float * c_xg,
__global __const float * c_xxg,
__local float * smem,
const VECTYPE ig)
{
const int y = get_global_id(1);
const int x = bx * (bdx - 2*polyN) + tx - polyN;
int xWarped;
__local float *row = smem + tx;
if (y < rows && y >= 0)
{
xWarped = min(max(x, 0), cols - 1);
row[0] = src[mad24(y, srcStep, xWarped)] * c_g[0];
row[bdx] = 0.f;
row[2*bdx] = 0.f;
#pragma unroll
for (int k = 1; k <= polyN; ++k)
{
float t0 = src[mad24(max(y - k, 0), srcStep, xWarped)];
float t1 = src[mad24(min(y + k, rows - 1), srcStep, xWarped)];
row[0] += c_g[k] * (t0 + t1);
row[bdx] += c_xg[k] * (t1 - t0);
row[2*bdx] += c_xxg[k] * (t0 + t1);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (y < rows && y >= 0 && tx >= polyN && tx + polyN < bdx && x < cols)
{
TYPE b1 = c_g[0] * row[0];
TYPE b3 = c_g[0] * row[bdx];
TYPE b5 = c_g[0] * row[2*bdx];
TYPE b2 = 0, b4 = 0, b6 = 0;
#pragma unroll
for (int k = 1; k <= polyN; ++k)
{
b1 += (row[k] + row[-k]) * c_g[k];
b4 += (row[k] + row[-k]) * c_xxg[k];
b2 += (row[k] - row[-k]) * c_xg[k];
b3 += (row[k + bdx] + row[-k + bdx]) * c_g[k];
b6 += (row[k + bdx] - row[-k + bdx]) * c_xg[k];
b5 += (row[k + 2*bdx] + row[-k + 2*bdx]) * c_g[k];
}
dst[mad24(y, dstStep, xWarped)] = (float)(b3*ig.s0);
dst[mad24(rows + y, dstStep, xWarped)] = (float)(b2*ig.s0);
dst[mad24(2*rows + y, dstStep, xWarped)] = (float)(b1*ig.s1 + b5*ig.s2);
dst[mad24(3*rows + y, dstStep, xWarped)] = (float)(b1*ig.s1 + b4*ig.s2);
dst[mad24(4*rows + y, dstStep, xWarped)] = (float)(b6*ig.s3);
}
}
inline int idx_row_low(const int y, const int last_row)
{
return abs(y) % (last_row + 1);
}
inline int idx_row_high(const int y, const int last_row)
{
return abs(last_row - abs(last_row - y)) % (last_row + 1);
}
inline int idx_col_low(const int x, const int last_col)
{
return abs(x) % (last_col + 1);
}
inline int idx_col_high(const int x, const int last_col)
{
return abs(last_col - abs(last_col - x)) % (last_col + 1);
}
inline int idx_col(const int x, const int last_col)
{
return idx_col_low(idx_col_high(x, last_col), last_col);
}
__kernel void gaussianBlur(__global const float * src, int srcStep,
__global float * dst, int dstStep, const int rows, const int cols,
__global const float * c_gKer, const int ksizeHalf,
__local float * smem)
{
const int y = get_global_id(1);
const int x = get_global_id(0);
__local float *row = smem + ty * (bdx + 2*ksizeHalf);
if (y < rows)
{
// Vertical pass
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
{
int xExt = (int)(bx * bdx) + i - ksizeHalf;
xExt = idx_col(xExt, cols - 1);
row[i] = src[mad24(y, srcStep, xExt)] * c_gKer[0];
for (int j = 1; j <= ksizeHalf; ++j)
row[i] += (src[mad24(idx_row_low(y - j, rows - 1), srcStep, xExt)]
+ src[mad24(idx_row_high(y + j, rows - 1), srcStep, xExt)]) * c_gKer[j];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (y < rows && y >= 0 && x < cols && x >= 0)
{
// Horizontal pass
row += tx + ksizeHalf;
float res = row[0] * c_gKer[0];
for (int i = 1; i <= ksizeHalf; ++i)
res += (row[-i] + row[i]) * c_gKer[i];
dst[mad24(y, dstStep, x)] = res;
}
}
__kernel void gaussianBlur5(__global const float * src, int srcStep,
__global float * dst, int dstStep,
const int rows, const int cols,
__global const float * c_gKer, const int ksizeHalf,
__local float * smem)
{
const int y = get_global_id(1);
const int x = get_global_id(0);
const int smw = bdx + 2*ksizeHalf; // shared memory "cols"
__local volatile float *row = smem + 5 * ty * smw;
if (y < rows)
{
// Vertical pass
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
{
int xExt = (int)(bx * bdx) + i - ksizeHalf;
xExt = idx_col(xExt, cols - 1);
#pragma unroll
for (int k = 0; k < 5; ++k)
row[k*smw + i] = src[mad24(k*rows + y, srcStep, xExt)] * c_gKer[0];
for (int j = 1; j <= ksizeHalf; ++j)
#pragma unroll
for (int k = 0; k < 5; ++k)
row[k*smw + i] +=
(src[mad24(k*rows + idx_row_low(y - j, rows - 1), srcStep, xExt)] +
src[mad24(k*rows + idx_row_high(y + j, rows - 1), srcStep, xExt)]) * c_gKer[j];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (y < rows && y >= 0 && x < cols && x >= 0)
{
// Horizontal pass
row += tx + ksizeHalf;
float res[5];
#pragma unroll
for (int k = 0; k < 5; ++k)
res[k] = row[k*smw] * c_gKer[0];
for (int i = 1; i <= ksizeHalf; ++i)
#pragma unroll
for (int k = 0; k < 5; ++k)
res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i];
#pragma unroll
for (int k = 0; k < 5; ++k)
dst[mad24(k*rows + y, dstStep, x)] = res[k];
}
}
__constant float c_border[BORDER_SIZE + 1] = { 0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f };
__kernel void updateMatrices(__global const float * flowx, int xStep,
__global const float * flowy, int yStep,
const int rows, const int cols,
__global const float * R0, int R0Step,
__global const float * R1, int R1Step,
__global float * M, int mStep)
{
const int y = get_global_id(1);
const int x = get_global_id(0);
if (y < rows && y >= 0 && x < cols && x >= 0)
{
float dx = flowx[mad24(y, xStep, x)];
float dy = flowy[mad24(y, yStep, x)];
float fx = x + dx;
float fy = y + dy;
int x1 = convert_int(floor(fx));
int y1 = convert_int(floor(fy));
fx -= x1;
fy -= y1;
float r2, r3, r4, r5, r6;
if (x1 >= 0 && y1 >= 0 && x1 < cols - 1 && y1 < rows - 1)
{
float a00 = (1.f - fx) * (1.f - fy);
float a01 = fx * (1.f - fy);
float a10 = (1.f - fx) * fy;
float a11 = fx * fy;
r2 = a00 * R1[mad24(y1, R1Step, x1)] +
a01 * R1[mad24(y1, R1Step, x1 + 1)] +
a10 * R1[mad24(y1 + 1, R1Step, x1)] +
a11 * R1[mad24(y1 + 1, R1Step, x1 + 1)];
r3 = a00 * R1[mad24(rows + y1, R1Step, x1)] +
a01 * R1[mad24(rows + y1, R1Step, x1 + 1)] +
a10 * R1[mad24(rows + y1 + 1, R1Step, x1)] +
a11 * R1[mad24(rows + y1 + 1, R1Step, x1 + 1)];
r4 = a00 * R1[mad24(2*rows + y1, R1Step, x1)] +
a01 * R1[mad24(2*rows + y1, R1Step, x1 + 1)] +
a10 * R1[mad24(2*rows + y1 + 1, R1Step, x1)] +
a11 * R1[mad24(2*rows + y1 + 1, R1Step, x1 + 1)];
r5 = a00 * R1[mad24(3*rows + y1, R1Step, x1)] +
a01 * R1[mad24(3*rows + y1, R1Step, x1 + 1)] +
a10 * R1[mad24(3*rows + y1 + 1, R1Step, x1)] +
a11 * R1[mad24(3*rows + y1 + 1, R1Step, x1 + 1)];
r6 = a00 * R1[mad24(4*rows + y1, R1Step, x1)] +
a01 * R1[mad24(4*rows + y1, R1Step, x1 + 1)] +
a10 * R1[mad24(4*rows + y1 + 1, R1Step, x1)] +
a11 * R1[mad24(4*rows + y1 + 1, R1Step, x1 + 1)];
r4 = (R0[mad24(2*rows + y, R0Step, x)] + r4) * 0.5f;
r5 = (R0[mad24(3*rows + y, R0Step, x)] + r5) * 0.5f;
r6 = (R0[mad24(4*rows + y, R0Step, x)] + r6) * 0.25f;
}
else
{
r2 = r3 = 0.f;
r4 = R0[mad24(2*rows + y, R0Step, x)];
r5 = R0[mad24(3*rows + y, R0Step, x)];
r6 = R0[mad24(4*rows + y, R0Step, x)] * 0.5f;
}
r2 = (R0[mad24(y, R0Step, x)] - r2) * 0.5f;
r3 = (R0[mad24(rows + y, R0Step, x)] - r3) * 0.5f;
r2 += r4*dy + r6*dx;
r3 += r6*dy + r5*dx;
float scale =
c_border[min(x, BORDER_SIZE)] *
c_border[min(y, BORDER_SIZE)] *
c_border[min(cols - x - 1, BORDER_SIZE)] *
c_border[min(rows - y - 1, BORDER_SIZE)];
r2 *= scale;
r3 *= scale;
r4 *= scale;
r5 *= scale;
r6 *= scale;
M[mad24(y, mStep, x)] = r4*r4 + r6*r6;
M[mad24(rows + y, mStep, x)] = (r4 + r5)*r6;
M[mad24(2*rows + y, mStep, x)] = r5*r5 + r6*r6;
M[mad24(3*rows + y, mStep, x)] = r4*r2 + r6*r3;
M[mad24(4*rows + y, mStep, x)] = r6*r2 + r5*r3;
}
}
__kernel void boxFilter5(__global const float * src, int srcStep,
__global float * dst, int dstStep,
const int rows, const int cols,
const int ksizeHalf,
__local float * smem)
{
const int y = get_global_id(1);
const int x = get_global_id(0);
const float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
const int smw = bdx + 2*ksizeHalf; // shared memory "width"
__local float *row = smem + 5 * ty * smw;
if (y < rows)
{
// Vertical pass
for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
{
int xExt = (int)(bx * bdx) + i - ksizeHalf;
xExt = min(max(xExt, 0), cols - 1);
#pragma unroll
for (int k = 0; k < 5; ++k)
row[k*smw + i] = src[mad24(k*rows + y, srcStep, xExt)];
for (int j = 1; j <= ksizeHalf; ++j)
#pragma unroll
for (int k = 0; k < 5; ++k)
row[k*smw + i] +=
src[mad24(k*rows + max(y - j, 0), srcStep, xExt)] +
src[mad24(k*rows + min(y + j, rows - 1), srcStep, xExt)];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (y < rows && y >= 0 && x < cols && x >= 0)
{
// Horizontal pass
row += tx + ksizeHalf;
float res[5];
#pragma unroll
for (int k = 0; k < 5; ++k)
res[k] = row[k*smw];
for (int i = 1; i <= ksizeHalf; ++i)
#pragma unroll
for (int k = 0; k < 5; ++k)
res[k] += row[k*smw - i] + row[k*smw + i];
#pragma unroll
for (int k = 0; k < 5; ++k)
dst[mad24(k*rows + y, dstStep, x)] = res[k] * boxAreaInv;
}
}
__kernel void updateFlow(__global const float * M, int mStep,
__global float * flowx, int xStep,
__global float * flowy, int yStep,
const int rows, const int cols)
{
const int y = get_global_id(1);
const int x = get_global_id(0);
if (y < rows && y >= 0 && x < cols && x >= 0)
{
float g11 = M[mad24(y, mStep, x)];
float g12 = M[mad24(rows + y, mStep, x)];
float g22 = M[mad24(2*rows + y, mStep, x)];
float h1 = M[mad24(3*rows + y, mStep, x)];
float h2 = M[mad24(4*rows + y, mStep, x)];
float detInv = 1.f / (g11*g22 - g12*g12 + 1e-3f);
flowx[mad24(y, xStep, x)] = (g11*h2 - g12*h1) * detInv;
flowy[mad24(y, yStep, x)] = (g22*h1 - g12*h2) * detInv;
}
}

View File

@@ -0,0 +1,558 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang, bitwangyaoyao@gmail.com
// Xiaopeng Fu, fuxiaopeng2222@163.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#define GRIDSIZE 3
#define LSx 8
#define LSy 8
// define local memory sizes
#define LM_W (LSx*GRIDSIZE+2)
#define LM_H (LSy*GRIDSIZE+2)
#define BUFFER (LSx*LSy)
#define BUFFER2 BUFFER>>1
#ifdef CPU
inline void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
smem3[tid] = val3;
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = BUFFER2; i > 0; i >>= 1)
{
if(tid < i)
{
smem1[tid] += smem1[tid + i];
smem2[tid] += smem2[tid + i];
smem3[tid] += smem3[tid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
inline void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = BUFFER2; i > 0; i >>= 1)
{
if(tid < i)
{
smem1[tid] += smem1[tid + i];
smem2[tid] += smem2[tid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
inline void reduce1(float val1, __local float* smem1, int tid)
{
smem1[tid] = val1;
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = BUFFER2; i > 0; i >>= 1)
{
if(tid < i)
{
smem1[tid] += smem1[tid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
#else
inline void reduce3(float val1, float val2, float val3,
__local float* smem1, __local float* smem2, __local float* smem3, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
smem3[tid] = val3;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
{
smem1[tid] += smem1[tid + 32];
smem2[tid] += smem2[tid + 32];
smem3[tid] += smem3[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
smem1[tid] += smem1[tid + 16];
smem2[tid] += smem2[tid + 16];
smem3[tid] += smem3[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
smem1[tid] += smem1[tid + 8];
smem2[tid] += smem2[tid + 8];
smem3[tid] += smem3[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
smem1[tid] += smem1[tid + 4];
smem2[tid] += smem2[tid + 4];
smem3[tid] += smem3[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0)
{
smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
smem2[0] = (smem2[0] + smem2[1]) + (smem2[2] + smem2[3]);
smem3[0] = (smem3[0] + smem3[1]) + (smem3[2] + smem3[3]);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
inline void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
{
smem1[tid] += smem1[tid + 32];
smem2[tid] += smem2[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
smem1[tid] += smem1[tid + 16];
smem2[tid] += smem2[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
smem1[tid] += smem1[tid + 8];
smem2[tid] += smem2[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
smem1[tid] += smem1[tid + 4];
smem2[tid] += smem2[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0)
{
smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
smem2[0] = (smem2[0] + smem2[1]) + (smem2[2] + smem2[3]);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
inline void reduce1(float val1, __local float* smem1, int tid)
{
smem1[tid] = val1;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
{
smem1[tid] += smem1[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
smem1[tid] += smem1[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
smem1[tid] += smem1[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
smem1[tid] += smem1[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0)
{
smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
#endif
#define SCALE (1.0f / (1 << 20))
#define THRESHOLD 0.01f
// Image read mode
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
// macro to get pixel value from local memory
#define VAL(_y,_x,_yy,_xx) (IPatchLocal[mad24(((_y) + (_yy)), LM_W, ((_x) + (_xx)))])
inline void SetPatch(local float* IPatchLocal, int TileY, int TileX,
float* Pch, float* Dx, float* Dy,
float* A11, float* A12, float* A22, float w)
{
int xid=get_local_id(0);
int yid=get_local_id(1);
int xBase = mad24(TileX, LSx, (xid + 1));
int yBase = mad24(TileY, LSy, (yid + 1));
*Pch = VAL(yBase,xBase,0,0);
*Dx = mad((VAL(yBase,xBase,-1,1) + VAL(yBase,xBase,+1,1) - VAL(yBase,xBase,-1,-1) - VAL(yBase,xBase,+1,-1)), 3.0f, (VAL(yBase,xBase,0,1) - VAL(yBase,xBase,0,-1)) * 10.0f) * w;
*Dy = mad((VAL(yBase,xBase,1,-1) + VAL(yBase,xBase,1,+1) - VAL(yBase,xBase,-1,-1) - VAL(yBase,xBase,-1,+1)), 3.0f, (VAL(yBase,xBase,1,0) - VAL(yBase,xBase,-1,0)) * 10.0f) * w;
*A11 = mad(*Dx, *Dx, *A11);
*A12 = mad(*Dx, *Dy, *A12);
*A22 = mad(*Dy, *Dy, *A22);
}
#undef VAL
inline void GetPatch(image2d_t J, float x, float y,
float* Pch, float* Dx, float* Dy,
float* b1, float* b2)
{
float diff = read_imagef(J, sampler, (float2)(x,y)).x-*Pch;
*b1 = mad(diff, *Dx, *b1);
*b2 = mad(diff, *Dy, *b2);
}
inline void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval, float w)
{
float diff = ((((read_imagef(J, sampler, (float2)(x,y)).x * 16384) + 256) / 512) - (((*Pch * 16384) + 256) /512)) * w;
*errval += fabs(diff);
}
//macro to read pixel value into local memory.
#define READI(_y,_x) IPatchLocal[mad24(mad24((_y), LSy, yid), LM_W, mad24((_x), LSx, xid))] = read_imagef(I, sampler, (float2)(mad((float)(_x), (float)LSx, Point.x + xid - 0.5f), mad((float)(_y), (float)LSy, Point.y + yid - 0.5f))).x;
void ReadPatchIToLocalMem(image2d_t I, float2 Point, local float* IPatchLocal)
{
int xid=get_local_id(0);
int yid=get_local_id(1);
//read (3*LSx)*(3*LSy) window. each macro call read LSx*LSy pixels block
READI(0,0);READI(0,1);READI(0,2);
READI(1,0);READI(1,1);READI(1,2);
READI(2,0);READI(2,1);READI(2,2);
if(xid<2)
{// read last 2 columns border. each macro call reads 2*LSy pixels block
READI(0,3);
READI(1,3);
READI(2,3);
}
if(yid<2)
{// read last 2 row. each macro call reads LSx*2 pixels block
READI(3,0);READI(3,1);READI(3,2);
}
if(yid<2 && xid<2)
{// read right bottom 2x2 corner. one macro call reads 2*2 pixels block
READI(3,3);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
#undef READI
__attribute__((reqd_work_group_size(LSx, LSy, 1)))
__kernel void lkSparse(image2d_t I, image2d_t J,
__global const float2* prevPts, __global float2* nextPts, __global uchar* status, __global float* err,
const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
{
__local float smem1[BUFFER];
__local float smem2[BUFFER];
__local float smem3[BUFFER];
int xid=get_local_id(0);
int yid=get_local_id(1);
int gid=get_group_id(0);
int xsize=get_local_size(0);
int ysize=get_local_size(1);
int k;
#ifdef CPU
float wx0 = 1.0f;
float wy0 = 1.0f;
int xBase = mad24(xsize, 2, xid);
int yBase = mad24(ysize, 2, yid);
float wx1 = (xBase < c_winSize_x) ? 1 : 0;
float wy1 = (yBase < c_winSize_y) ? 1 : 0;
#else
#if WSX == 1
float wx0 = 1.0f;
int xBase = mad24(xsize, 2, xid);
float wx1 = (xBase < c_winSize_x) ? 1 : 0;
#else
int xBase = mad24(xsize, 1, xid);
float wx0 = (xBase < c_winSize_x) ? 1 : 0;
float wx1 = 0.0f;
#endif
#if WSY == 1
float wy0 = 1.0f;
int yBase = mad24(ysize, 2, yid);
float wy1 = (yBase < c_winSize_y) ? 1 : 0;
#else
int yBase = mad24(ysize, 1, yid);
float wy0 = (yBase < c_winSize_y) ? 1 : 0;
float wy1 = 0.0f;
#endif
#endif
float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
const int tid = mad24(yid, xsize, xid);
float2 prevPt = prevPts[gid] / (float2)(1 << level);
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
{
if (tid == 0 && level == 0)
{
status[gid] = 0;
}
return;
}
prevPt -= c_halfWin;
// extract the patch from the first image, compute covariation matrix of derivatives
float A11 = 0;
float A12 = 0;
float A22 = 0;
float I_patch[GRIDSIZE][GRIDSIZE];
float dIdx_patch[GRIDSIZE][GRIDSIZE];
float dIdy_patch[GRIDSIZE][GRIDSIZE];
// local memory to read image with border to calc sobels
local float IPatchLocal[LM_W*LM_H];
ReadPatchIToLocalMem(I,prevPt,IPatchLocal);
{
SetPatch(IPatchLocal, 0, 0,
&I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
&A11, &A12, &A22,1);
SetPatch(IPatchLocal, 0, 1,
&I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
&A11, &A12, &A22,wx0);
SetPatch(IPatchLocal, 0, 2,
&I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
&A11, &A12, &A22,wx1);
}
{
SetPatch(IPatchLocal, 1, 0,
&I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
&A11, &A12, &A22,wy0);
SetPatch(IPatchLocal, 1,1,
&I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
&A11, &A12, &A22,wx0*wy0);
SetPatch(IPatchLocal, 1,2,
&I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
&A11, &A12, &A22,wx1*wy0);
}
{
SetPatch(IPatchLocal, 2,0,
&I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
&A11, &A12, &A22,wy1);
SetPatch(IPatchLocal, 2,1,
&I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
&A11, &A12, &A22,wx0*wy1);
SetPatch(IPatchLocal, 2,2,
&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
&A11, &A12, &A22,wx1*wy1);
}
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
barrier(CLK_LOCAL_MEM_FENCE);
float D = mad(A11, A22, - A12 * A12);
if (D < 1.192092896e-07f)
{
if (tid == 0 && level == 0)
status[gid] = 0;
return;
}
A11 /= D;
A12 /= D;
A22 /= D;
prevPt = mad(nextPts[gid], 2.0f, - c_halfWin);
float2 offset0 = (float2)(xid + 0.5f, yid + 0.5f);
float2 offset1 = (float2)(xsize, ysize);
float2 loc0 = prevPt + offset0;
float2 loc1 = loc0 + offset1;
float2 loc2 = loc1 + offset1;
for (k = 0; k < c_iters; ++k)
{
if (prevPt.x < -c_halfWin.x || prevPt.x >= cols || prevPt.y < -c_halfWin.y || prevPt.y >= rows)
{
if (tid == 0 && level == 0)
status[gid] = 0;
break;
}
float b1 = 0;
float b2 = 0;
{
GetPatch(J, loc0.x, loc0.y,
&I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
&b1, &b2);
GetPatch(J, loc1.x, loc0.y,
&I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
&b1, &b2);
GetPatch(J, loc2.x, loc0.y,
&I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
&b1, &b2);
}
{
GetPatch(J, loc0.x, loc1.y,
&I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
&b1, &b2);
GetPatch(J, loc1.x, loc1.y,
&I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
&b1, &b2);
GetPatch(J, loc2.x, loc1.y,
&I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
&b1, &b2);
}
{
GetPatch(J, loc0.x, loc2.y,
&I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
&b1, &b2);
GetPatch(J, loc1.x, loc2.y,
&I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
&b1, &b2);
GetPatch(J, loc2.x, loc2.y,
&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
&b1, &b2);
}
reduce2(b1, b2, smem1, smem2, tid);
b1 = smem1[0];
b2 = smem2[0];
barrier(CLK_LOCAL_MEM_FENCE);
float2 delta;
delta.x = mad(A12, b2, - A22 * b1) * 32.0f;
delta.y = mad(A12, b1, - A11 * b2) * 32.0f;
prevPt += delta;
loc0 += delta;
loc1 += delta;
loc2 += delta;
if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)
break;
}
D = 0.0f;
if (calcErr)
{
{
GetError(J, loc0.x, loc0.y, &I_patch[0][0], &D, 1);
GetError(J, loc1.x, loc0.y, &I_patch[0][1], &D, wx0);
}
{
GetError(J, loc0.x, loc1.y, &I_patch[1][0], &D, wy0);
GetError(J, loc1.x, loc1.y, &I_patch[1][1], &D, wx0*wy0);
}
if(xBase < c_winSize_x)
{
GetError(J, loc2.x, loc0.y, &I_patch[0][2], &D, wx1);
GetError(J, loc2.x, loc1.y, &I_patch[1][2], &D, wx1*wy0);
}
if(yBase < c_winSize_y)
{
GetError(J, loc0.x, loc2.y, &I_patch[2][0], &D, wy1);
GetError(J, loc1.x, loc2.y, &I_patch[2][1], &D, wx0*wy1);
if(xBase < c_winSize_x)
GetError(J, loc2.x, loc2.y, &I_patch[2][2], &D, wx1*wy1);
}
reduce1(D, smem1, tid);
}
if (tid == 0)
{
prevPt += c_halfWin;
nextPts[gid] = prevPt;
if (calcErr)
err[gid] = smem1[0] / (float)(32 * c_winSize_x * c_winSize_y);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,129 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include<iostream>
#include<fstream>
namespace cv {
const float FLOW_TAG_FLOAT = 202021.25f;
const char *FLOW_TAG_STRING = "PIEH";
CV_EXPORTS_W Mat readOpticalFlow( const String& path )
{
Mat_<Point2f> flow;
std::ifstream file(path.c_str(), std::ios_base::binary);
if ( !file.good() )
return std::move(flow); // no file - return empty matrix
float tag;
file.read((char*) &tag, sizeof(float));
if ( tag != FLOW_TAG_FLOAT )
return std::move(flow);
int width, height;
file.read((char*) &width, 4);
file.read((char*) &height, 4);
flow.create(height, width);
for ( int i = 0; i < flow.rows; ++i )
{
for ( int j = 0; j < flow.cols; ++j )
{
Point2f u;
file.read((char*) &u.x, sizeof(float));
file.read((char*) &u.y, sizeof(float));
if ( !file.good() )
{
flow.release();
return std::move(flow);
}
flow(i, j) = u;
}
}
file.close();
return std::move(flow);
}
CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow )
{
const int nChannels = 2;
Mat input = flow.getMat();
if ( input.channels() != nChannels || input.depth() != CV_32F || path.length() == 0 )
return false;
std::ofstream file(path.c_str(), std::ofstream::binary);
if ( !file.good() )
return false;
int nRows, nCols;
nRows = (int) input.size().height;
nCols = (int) input.size().width;
const int headerSize = 12;
char header[headerSize];
memcpy(header, FLOW_TAG_STRING, 4);
// size of ints is known - has been asserted in the current function
memcpy(header + 4, reinterpret_cast<const char*>(&nCols), sizeof(nCols));
memcpy(header + 8, reinterpret_cast<const char*>(&nRows), sizeof(nRows));
file.write(header, headerSize);
if ( !file.good() )
return false;
int row;
char* p;
for ( row = 0; row < nRows; row++ )
{
p = input.ptr<char>(row);
file.write(p, nCols * nChannels * sizeof(float));
if ( !file.good() )
return false;
}
file.close();
return true;
}
}

View File

@@ -0,0 +1,52 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_PRECOMP_H__
#define __OPENCV_PRECOMP_H__
#include "opencv2/video.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
#include "opencv2/core.hpp"
#endif

File diff suppressed because it is too large Load Diff