add part of opencv

2020-01-27 20:20:56 +08:00
parent 0c4ac1d8bb
commit a71fa47620
6518 changed files with 3122580 additions and 0 deletions
--- a/Lib/opencv/sources/modules/video/src/bgfg_KNN.cpp
+++ b/Lib/opencv/sources/modules/video/src/bgfg_KNN.cpp
@@ -0,0 +1,881 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+//#include <math.h>
+
+#include "precomp.hpp"
+#include "opencl_kernels_video.hpp"
+
+namespace cv
+{
+
+/*!
+ The class implements the following algorithm:
+ "Efficient Adaptive Density Estimation per Image Pixel for the Task of Background Subtraction"
+ Z.Zivkovic, F. van der Heijden
+ Pattern Recognition Letters, vol. 27, no. 7, pages 773-780, 2006
+ http://www.zoranz.net/Publications/zivkovicPRL2006.pdf
+*/
+
+// default parameters of gaussian background detection algorithm
+static const int defaultHistory2 = 500; // Learning rate; alpha = 1/defaultHistory2
+static const int defaultNsamples = 7; // number of samples saved in memory
+static const float defaultDist2Threshold = 20.0f*20.0f;//threshold on distance from the sample
+
+// additional parameters
+static const unsigned char defaultnShadowDetection2 = (unsigned char)127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
+static const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
+
+class BackgroundSubtractorKNNImpl CV_FINAL : public BackgroundSubtractorKNN
+{
+public:
+    //! the default constructor
+    BackgroundSubtractorKNNImpl()
+    {
+    frameSize = Size(0,0);
+    frameType = 0;
+    nframes = 0;
+    history = defaultHistory2;
+
+    //set parameters
+    // N - the number of samples stored in memory per model
+    nN = defaultNsamples;
+
+    //kNN - k nearest neighbour - number on NN for detecting background - default K=[0.1*nN]
+    nkNN=MAX(1,cvRound(0.1*nN*3+0.40));
+
+    //Tb - Threshold Tb*kernelwidth
+    fTb = defaultDist2Threshold;
+
+    // Shadow detection
+    bShadowDetection = 1;//turn on
+    nShadowDetection =  defaultnShadowDetection2;
+    fTau = defaultfTau;// Tau - shadow threshold
+    name_ = "BackgroundSubtractor.KNN";
+    nLongCounter = 0;
+    nMidCounter = 0;
+    nShortCounter = 0;
+#ifdef HAVE_OPENCL
+    opencl_ON = true;
+#endif
+    }
+    //! the full constructor that takes the length of the history,
+    // the number of gaussian mixtures, the background ratio parameter and the noise strength
+    BackgroundSubtractorKNNImpl(int _history,  float _dist2Threshold, bool _bShadowDetection=true)
+    {
+    frameSize = Size(0,0);
+    frameType = 0;
+
+    nframes = 0;
+    history = _history > 0 ? _history : defaultHistory2;
+
+    //set parameters
+    // N - the number of samples stored in memory per model
+    nN = defaultNsamples;
+    //kNN - k nearest neighbour - number on NN for detcting background - default K=[0.1*nN]
+    nkNN=MAX(1,cvRound(0.1*nN*3+0.40));
+
+    //Tb - Threshold Tb*kernelwidth
+    fTb = _dist2Threshold>0? _dist2Threshold : defaultDist2Threshold;
+
+    bShadowDetection = _bShadowDetection;
+    nShadowDetection =  defaultnShadowDetection2;
+    fTau = defaultfTau;
+    name_ = "BackgroundSubtractor.KNN";
+    nLongCounter = 0;
+    nMidCounter = 0;
+    nShortCounter = 0;
+#ifdef HAVE_OPENCL
+    opencl_ON = true;
+#endif
+    }
+    //! the destructor
+    ~BackgroundSubtractorKNNImpl() CV_OVERRIDE {}
+    //! the update operator
+    void apply(InputArray image, OutputArray fgmask, double learningRate) CV_OVERRIDE;
+
+    //! computes a background image which are the mean of all background gaussians
+    virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE;
+
+    //! re-initialization method
+    void initialize(Size _frameSize, int _frameType)
+    {
+        frameSize = _frameSize;
+        frameType = _frameType;
+        nframes = 0;
+
+        int nchannels = CV_MAT_CN(frameType);
+        CV_Assert( nchannels <= CV_CN_MAX );
+
+        // Reserve memory for the model
+        int size=frameSize.height*frameSize.width;
+        //Reset counters
+        nShortCounter = 0;
+        nMidCounter = 0;
+        nLongCounter = 0;
+
+#ifdef HAVE_OPENCL
+        if (ocl::isOpenCLActivated() && opencl_ON)
+        {
+            create_ocl_apply_kernel();
+
+            kernel_getBg.create("getBackgroundImage2_kernel", ocl::video::bgfg_knn_oclsrc, format( "-D CN=%d -D NSAMPLES=%d", nchannels, nN));
+
+            if (kernel_apply.empty() || kernel_getBg.empty())
+                opencl_ON = false;
+        }
+        else opencl_ON = false;
+
+        if (opencl_ON)
+        {
+            u_flag.create(frameSize.height * nN * 3, frameSize.width, CV_8UC1);
+            u_flag.setTo(Scalar::all(0));
+
+            if (nchannels==3)
+                nchannels=4;
+            u_sample.create(frameSize.height * nN * 3, frameSize.width, CV_32FC(nchannels));
+            u_sample.setTo(Scalar::all(0));
+
+            u_aModelIndexShort.create(frameSize.height, frameSize.width, CV_8UC1);
+            u_aModelIndexShort.setTo(Scalar::all(0));
+            u_aModelIndexMid.create(frameSize.height, frameSize.width, CV_8UC1);
+            u_aModelIndexMid.setTo(Scalar::all(0));
+            u_aModelIndexLong.create(frameSize.height, frameSize.width, CV_8UC1);
+            u_aModelIndexLong.setTo(Scalar::all(0));
+
+            u_nNextShortUpdate.create(frameSize.height, frameSize.width, CV_8UC1);
+            u_nNextShortUpdate.setTo(Scalar::all(0));
+            u_nNextMidUpdate.create(frameSize.height, frameSize.width, CV_8UC1);
+            u_nNextMidUpdate.setTo(Scalar::all(0));
+            u_nNextLongUpdate.create(frameSize.height, frameSize.width, CV_8UC1);
+            u_nNextLongUpdate.setTo(Scalar::all(0));
+        }
+        else
+#endif
+        {
+            // for each sample of 3 speed pixel models each pixel bg model we store ...
+            // values + flag (nchannels+1 values)
+            bgmodel.create( 1,(nN * 3) * (nchannels+1)* size,CV_8U);
+            bgmodel = Scalar::all(0);
+
+            //index through the three circular lists
+            aModelIndexShort.create(1,size,CV_8U);
+            aModelIndexMid.create(1,size,CV_8U);
+            aModelIndexLong.create(1,size,CV_8U);
+            //when to update next
+            nNextShortUpdate.create(1,size,CV_8U);
+            nNextMidUpdate.create(1,size,CV_8U);
+            nNextLongUpdate.create(1,size,CV_8U);
+
+            aModelIndexShort = Scalar::all(0);//random? //((m_nN)*rand())/(RAND_MAX+1);//0...m_nN-1
+            aModelIndexMid = Scalar::all(0);
+            aModelIndexLong = Scalar::all(0);
+            nNextShortUpdate = Scalar::all(0);
+            nNextMidUpdate = Scalar::all(0);
+            nNextLongUpdate = Scalar::all(0);
+        }
+    }
+
+    virtual int getHistory() const CV_OVERRIDE { return history; }
+    virtual void setHistory(int _nframes) CV_OVERRIDE { history = _nframes; }
+
+    virtual int getNSamples() const CV_OVERRIDE { return nN; }
+    virtual void setNSamples(int _nN) CV_OVERRIDE { nN = _nN; }//needs reinitialization!
+
+    virtual int getkNNSamples() const CV_OVERRIDE { return nkNN; }
+    virtual void setkNNSamples(int _nkNN) CV_OVERRIDE { nkNN = _nkNN; }
+
+    virtual double getDist2Threshold() const CV_OVERRIDE { return fTb; }
+    virtual void setDist2Threshold(double _dist2Threshold) CV_OVERRIDE { fTb = (float)_dist2Threshold; }
+
+    virtual bool getDetectShadows() const CV_OVERRIDE { return bShadowDetection; }
+    virtual void setDetectShadows(bool detectshadows) CV_OVERRIDE
+    {
+        if (bShadowDetection == detectshadows)
+            return;
+        bShadowDetection = detectshadows;
+#ifdef HAVE_OPENCL
+        if (!kernel_apply.empty())
+        {
+            create_ocl_apply_kernel();
+            CV_Assert( !kernel_apply.empty() );
+        }
+#endif
+    }
+
+    virtual int getShadowValue() const CV_OVERRIDE { return nShadowDetection; }
+    virtual void setShadowValue(int value) CV_OVERRIDE { nShadowDetection = (uchar)value; }
+
+    virtual double getShadowThreshold() const CV_OVERRIDE { return fTau; }
+    virtual void setShadowThreshold(double value) CV_OVERRIDE { fTau = (float)value; }
+
+    virtual void write(FileStorage& fs) const CV_OVERRIDE
+    {
+        writeFormat(fs);
+        fs << "name" << name_
+        << "history" << history
+        << "nsamples" << nN
+        << "nKNN" << nkNN
+        << "dist2Threshold" << fTb
+        << "detectShadows" << (int)bShadowDetection
+        << "shadowValue" << (int)nShadowDetection
+        << "shadowThreshold" << fTau;
+    }
+
+    virtual void read(const FileNode& fn) CV_OVERRIDE
+    {
+        CV_Assert( (String)fn["name"] == name_ );
+        history = (int)fn["history"];
+        nN = (int)fn["nsamples"];
+        nkNN = (int)fn["nKNN"];
+        fTb = (float)fn["dist2Threshold"];
+        bShadowDetection = (int)fn["detectShadows"] != 0;
+        nShadowDetection = saturate_cast<uchar>((int)fn["shadowValue"]);
+        fTau = (float)fn["shadowThreshold"];
+    }
+
+protected:
+    Size frameSize;
+    int frameType;
+    int nframes;
+    /////////////////////////
+    //very important parameters - things you will change
+    ////////////////////////
+    int history;
+    //alpha=1/history - speed of update - if the time interval you want to average over is T
+    //set alpha=1/history. It is also useful at start to make T slowly increase
+    //from 1 until the desired T
+    float fTb;
+    //Tb - threshold on the squared distance from the sample used to decide if it is well described
+    //by the background model or not. A typical value could be 2 sigma
+    //and that is Tb=2*2*10*10 =400; where we take typical pixel level sigma=10
+
+    /////////////////////////
+    //less important parameters - things you might change but be careful
+    ////////////////////////
+    int nN;//totlal number of samples
+    int nkNN;//number on NN for detcting background - default K=[0.1*nN]
+
+    //shadow detection parameters
+    bool bShadowDetection;//default 1 - do shadow detection
+    unsigned char nShadowDetection;//do shadow detection - insert this value as the detection result - 127 default value
+    float fTau;
+    // Tau - shadow threshold. The shadow is detected if the pixel is darker
+    //version of the background. Tau is a threshold on how much darker the shadow can be.
+    //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+    //See: Prati,Mikic,Trivedi,Cucchiara,"Detecting Moving Shadows...",IEEE PAMI,2003.
+
+    //model data
+    int nLongCounter;//circular counter
+    int nMidCounter;
+    int nShortCounter;
+    Mat bgmodel; // model data pixel values
+    Mat aModelIndexShort;// index into the models
+    Mat aModelIndexMid;
+    Mat aModelIndexLong;
+    Mat nNextShortUpdate;//random update points per model
+    Mat nNextMidUpdate;
+    Mat nNextLongUpdate;
+
+#ifdef HAVE_OPENCL
+    mutable bool opencl_ON;
+
+    UMat u_flag;
+    UMat u_sample;
+    UMat u_aModelIndexShort;
+    UMat u_aModelIndexMid;
+    UMat u_aModelIndexLong;
+    UMat u_nNextShortUpdate;
+    UMat u_nNextMidUpdate;
+    UMat u_nNextLongUpdate;
+
+    mutable ocl::Kernel kernel_apply;
+    mutable ocl::Kernel kernel_getBg;
+#endif
+
+    String name_;
+
+#ifdef HAVE_OPENCL
+    bool ocl_getBackgroundImage(OutputArray backgroundImage) const;
+    bool ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate=-1);
+    void create_ocl_apply_kernel();
+#endif
+};
+
+CV_INLINE void
+        _cvUpdatePixelBackgroundNP(int x_idx, const uchar* data, int nchannels, int m_nN,
+        uchar* m_aModel,
+        uchar* m_nNextLongUpdate,
+        uchar* m_nNextMidUpdate,
+        uchar* m_nNextShortUpdate,
+        uchar* m_aModelIndexLong,
+        uchar* m_aModelIndexMid,
+        uchar* m_aModelIndexShort,
+        int m_nLongCounter,
+        int m_nMidCounter,
+        int m_nShortCounter,
+        uchar include
+        )
+{
+    // hold the offset
+    int ndata=1+nchannels;
+    long offsetLong =  ndata * (m_aModelIndexLong[x_idx] + m_nN * 2);
+    long offsetMid =   ndata * (m_aModelIndexMid[x_idx]  + m_nN * 1);
+    long offsetShort = ndata * (m_aModelIndexShort[x_idx]);
+
+    // Long update?
+    if (m_nNextLongUpdate[x_idx] == m_nLongCounter)
+    {
+        // add the oldest pixel from Mid to the list of values (for each color)
+        memcpy(&m_aModel[offsetLong],&m_aModel[offsetMid],ndata*sizeof(unsigned char));
+        // increase the index
+        m_aModelIndexLong[x_idx] = (m_aModelIndexLong[x_idx] >= (m_nN-1)) ? 0 : (m_aModelIndexLong[x_idx] + 1);
+    };
+
+    // Mid update?
+    if (m_nNextMidUpdate[x_idx] == m_nMidCounter)
+    {
+        // add this pixel to the list of values (for each color)
+        memcpy(&m_aModel[offsetMid],&m_aModel[offsetShort],ndata*sizeof(unsigned char));
+        // increase the index
+        m_aModelIndexMid[x_idx] = (m_aModelIndexMid[x_idx] >= (m_nN-1)) ? 0 : (m_aModelIndexMid[x_idx] + 1);
+    };
+
+    // Short update?
+    if (m_nNextShortUpdate[x_idx] == m_nShortCounter)
+    {
+        // add this pixel to the list of values (for each color)
+        memcpy(&m_aModel[offsetShort],data,nchannels*sizeof(unsigned char));
+        //set the include flag
+        m_aModel[offsetShort+nchannels]=include;
+        // increase the index
+        m_aModelIndexShort[x_idx] = (m_aModelIndexShort[x_idx] >= (m_nN-1)) ? 0 : (m_aModelIndexShort[x_idx] + 1);
+    };
+}
+
+CV_INLINE int
+        _cvCheckPixelBackgroundNP(const uchar* data, int nchannels,
+        int m_nN,
+        uchar* m_aModel,
+        float m_fTb,
+        int m_nkNN,
+        float tau,
+        bool m_bShadowDetection,
+        uchar& include)
+{
+    int Pbf = 0; // the total probability that this pixel is background
+    int Pb = 0; //background model probability
+    float dData[CV_CN_MAX];
+
+    //uchar& include=data[nchannels];
+    include=0;//do we include this pixel into background model?
+
+    int ndata=nchannels+1;
+    // now increase the probability for each pixel
+    for (int n = 0; n < m_nN*3; n++)
+    {
+        uchar* mean_m = &m_aModel[n*ndata];
+
+        //calculate difference and distance
+        float dist2;
+
+        if( nchannels == 3 )
+        {
+            dData[0] = (float)mean_m[0] - data[0];
+            dData[1] = (float)mean_m[1] - data[1];
+            dData[2] = (float)mean_m[2] - data[2];
+            dist2 = dData[0]*dData[0] + dData[1]*dData[1] + dData[2]*dData[2];
+        }
+        else
+        {
+            dist2 = 0.f;
+            for( int c = 0; c < nchannels; c++ )
+            {
+                dData[c] = (float)mean_m[c] - data[c];
+                dist2 += dData[c]*dData[c];
+            }
+        }
+
+        if (dist2<m_fTb)
+        {
+            Pbf++;//all
+            //background only
+            //if(m_aModel[subPosPixel + nchannels])//indicator
+            if(mean_m[nchannels])//indicator
+            {
+                Pb++;
+                if (Pb >= m_nkNN)//Tb
+                {
+                    include=1;//include
+                    return 1;//background ->exit
+                };
+            }
+        };
+    };
+
+    //include?
+    if (Pbf>=m_nkNN)//m_nTbf)
+    {
+        include=1;
+    }
+
+    int Ps = 0; // the total probability that this pixel is background shadow
+    // Detected as moving object, perform shadow detection
+    if (m_bShadowDetection)
+    {
+        for (int n = 0; n < m_nN*3; n++)
+        {
+            //long subPosPixel = posPixel + n*ndata;
+            uchar* mean_m = &m_aModel[n*ndata];
+
+            if(mean_m[nchannels])//check only background
+            {
+                float numerator = 0.0f;
+                float denominator = 0.0f;
+                for( int c = 0; c < nchannels; c++ )
+                {
+                    numerator   += (float)data[c] * mean_m[c];
+                    denominator += (float)mean_m[c] * mean_m[c];
+                }
+
+                // no division by zero allowed
+                if( denominator == 0 )
+                    return 0;
+
+                // if tau < a < 1 then also check the color distortion
+                if( numerator <= denominator && numerator >= tau*denominator )
+                {
+                    float a = numerator / denominator;
+                    float dist2a = 0.0f;
+
+                    for( int c = 0; c < nchannels; c++ )
+                    {
+                        float dD= a*mean_m[c] - data[c];
+                        dist2a += dD*dD;
+                    }
+
+                    if (dist2a<m_fTb*a*a)
+                    {
+                        Ps++;
+                        if (Ps >= m_nkNN)//shadow
+                            return 2;
+                    };
+                };
+            };
+        };
+    }
+    return 0;
+}
+
+class KNNInvoker : public ParallelLoopBody
+{
+public:
+    KNNInvoker(const Mat& _src, Mat& _dst,
+               uchar* _bgmodel,
+               uchar* _nNextLongUpdate,
+               uchar* _nNextMidUpdate,
+               uchar* _nNextShortUpdate,
+               uchar* _aModelIndexLong,
+               uchar* _aModelIndexMid,
+               uchar* _aModelIndexShort,
+               int _nLongCounter,
+               int _nMidCounter,
+               int _nShortCounter,
+               int _nN,
+               float _fTb,
+               int _nkNN,
+               float _fTau,
+               bool _bShadowDetection,
+               uchar _nShadowDetection)
+    {
+        src = &_src;
+        dst = &_dst;
+        m_aModel0 = _bgmodel;
+        m_nNextLongUpdate0 = _nNextLongUpdate;
+        m_nNextMidUpdate0 = _nNextMidUpdate;
+        m_nNextShortUpdate0 = _nNextShortUpdate;
+        m_aModelIndexLong0 = _aModelIndexLong;
+        m_aModelIndexMid0 = _aModelIndexMid;
+        m_aModelIndexShort0 = _aModelIndexShort;
+        m_nLongCounter = _nLongCounter;
+        m_nMidCounter = _nMidCounter;
+        m_nShortCounter = _nShortCounter;
+        m_nN = _nN;
+        m_fTb = _fTb;
+        m_fTau = _fTau;
+        m_nkNN = _nkNN;
+        m_bShadowDetection = _bShadowDetection;
+        m_nShadowDetection = _nShadowDetection;
+    }
+
+    void operator()(const Range& range) const CV_OVERRIDE
+    {
+        int y0 = range.start, y1 = range.end;
+        int ncols = src->cols, nchannels = src->channels();
+        int ndata=nchannels+1;
+
+        for ( int y = y0; y < y1; y++ )
+        {
+            const uchar* data = src->ptr(y);
+            uchar* m_aModel = m_aModel0 + ncols*m_nN*3*ndata*y;
+            uchar* m_nNextLongUpdate = m_nNextLongUpdate0 + ncols*y;
+            uchar* m_nNextMidUpdate = m_nNextMidUpdate0 + ncols*y;
+            uchar* m_nNextShortUpdate = m_nNextShortUpdate0 + ncols*y;
+            uchar* m_aModelIndexLong = m_aModelIndexLong0 + ncols*y;
+            uchar* m_aModelIndexMid = m_aModelIndexMid0 + ncols*y;
+            uchar* m_aModelIndexShort = m_aModelIndexShort0 + ncols*y;
+            uchar* mask = dst->ptr(y);
+
+            for ( int x = 0; x < ncols; x++ )
+            {
+
+                //update model+ background subtract
+                uchar include=0;
+                int result= _cvCheckPixelBackgroundNP(data, nchannels,
+                        m_nN, m_aModel, m_fTb,m_nkNN, m_fTau,m_bShadowDetection,include);
+
+                _cvUpdatePixelBackgroundNP(x,data,nchannels,
+                        m_nN, m_aModel,
+                        m_nNextLongUpdate,
+                        m_nNextMidUpdate,
+                        m_nNextShortUpdate,
+                        m_aModelIndexLong,
+                        m_aModelIndexMid,
+                        m_aModelIndexShort,
+                        m_nLongCounter,
+                        m_nMidCounter,
+                        m_nShortCounter,
+                        include
+                        );
+                switch (result)
+                {
+                    case 0:
+                        //foreground
+                        mask[x] = 255;
+                        break;
+                    case 1:
+                        //background
+                        mask[x] = 0;
+                        break;
+                    case 2:
+                        //shadow
+                        mask[x] = m_nShadowDetection;
+                        break;
+                }
+                data += nchannels;
+                m_aModel += m_nN*3*ndata;
+            }
+        }
+    }
+
+    const Mat* src;
+    Mat* dst;
+    uchar* m_aModel0;
+    uchar* m_nNextLongUpdate0;
+    uchar* m_nNextMidUpdate0;
+    uchar* m_nNextShortUpdate0;
+    uchar* m_aModelIndexLong0;
+    uchar* m_aModelIndexMid0;
+    uchar* m_aModelIndexShort0;
+    int m_nLongCounter;
+    int m_nMidCounter;
+    int m_nShortCounter;
+    int m_nN;
+    float m_fTb;
+    float m_fTau;
+    int m_nkNN;
+    bool m_bShadowDetection;
+    uchar m_nShadowDetection;
+};
+
+#ifdef HAVE_OPENCL
+bool BackgroundSubtractorKNNImpl::ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate)
+{
+    bool needToInitialize = nframes == 0 || learningRate >= 1 || _image.size() != frameSize || _image.type() != frameType;
+
+    if( needToInitialize )
+        initialize(_image.size(), _image.type());
+
+    ++nframes;
+    learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history );
+    CV_Assert(learningRate >= 0);
+
+    _fgmask.create(_image.size(), CV_8U);
+    UMat fgmask = _fgmask.getUMat();
+
+    UMat frame = _image.getUMat();
+
+    //recalculate update rates - in case alpha is changed
+    // calculate update parameters (using alpha)
+    int Kshort,Kmid,Klong;
+    //approximate exponential learning curve
+    Kshort=(int)(log(0.7)/log(1-learningRate))+1;//Kshort
+    Kmid=(int)(log(0.4)/log(1-learningRate))-Kshort+1;//Kmid
+    Klong=(int)(log(0.1)/log(1-learningRate))-Kshort-Kmid+1;//Klong
+
+    //refresh rates
+    int nShortUpdate = (Kshort/nN)+1;
+    int nMidUpdate = (Kmid/nN)+1;
+    int nLongUpdate = (Klong/nN)+1;
+
+    int idxArg = 0;
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::ReadOnly(frame));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadOnly(u_nNextLongUpdate));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadOnly(u_nNextMidUpdate));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadOnly(u_nNextShortUpdate));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_aModelIndexLong));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_aModelIndexMid));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_aModelIndexShort));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_flag));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_sample));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::WriteOnlyNoSize(fgmask));
+
+    idxArg = kernel_apply.set(idxArg, nLongCounter);
+    idxArg = kernel_apply.set(idxArg, nMidCounter);
+    idxArg = kernel_apply.set(idxArg, nShortCounter);
+    idxArg = kernel_apply.set(idxArg, fTb);
+    idxArg = kernel_apply.set(idxArg, nkNN);
+    idxArg = kernel_apply.set(idxArg, fTau);
+    if (bShadowDetection)
+        kernel_apply.set(idxArg, nShadowDetection);
+
+    size_t globalsize[2] = {(size_t)frame.cols, (size_t)frame.rows};
+    if(!kernel_apply.run(2, globalsize, NULL, true))
+        return false;
+
+    nShortCounter++;//0,1,...,nShortUpdate-1
+    nMidCounter++;
+    nLongCounter++;
+    if (nShortCounter >= nShortUpdate)
+    {
+        nShortCounter = 0;
+        randu(u_nNextShortUpdate, Scalar::all(0),  Scalar::all(nShortUpdate));
+    }
+    if (nMidCounter >= nMidUpdate)
+    {
+        nMidCounter = 0;
+        randu(u_nNextMidUpdate, Scalar::all(0),  Scalar::all(nMidUpdate));
+    }
+    if (nLongCounter >= nLongUpdate)
+    {
+        nLongCounter = 0;
+        randu(u_nNextLongUpdate, Scalar::all(0),  Scalar::all(nLongUpdate));
+    }
+    return true;
+}
+
+bool BackgroundSubtractorKNNImpl::ocl_getBackgroundImage(OutputArray _backgroundImage) const
+{
+    _backgroundImage.create(frameSize, frameType);
+    UMat dst = _backgroundImage.getUMat();
+
+    int idxArg = 0;
+    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_flag));
+    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_sample));
+    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::WriteOnly(dst));
+
+    size_t globalsize[2] = {(size_t)dst.cols, (size_t)dst.rows};
+
+    return kernel_getBg.run(2, globalsize, NULL, false);
+}
+
+void BackgroundSubtractorKNNImpl::create_ocl_apply_kernel()
+{
+    int nchannels = CV_MAT_CN(frameType);
+    String opts = format("-D CN=%d -D NSAMPLES=%d%s", nchannels, nN, bShadowDetection ? " -D SHADOW_DETECT" : "");
+    kernel_apply.create("knn_kernel", ocl::video::bgfg_knn_oclsrc, opts);
+}
+
+#endif
+
+void BackgroundSubtractorKNNImpl::apply(InputArray _image, OutputArray _fgmask, double learningRate)
+{
+    CV_INSTRUMENT_REGION();
+
+#ifdef HAVE_OPENCL
+    if (opencl_ON)
+    {
+#ifndef __APPLE__
+        CV_OCL_RUN(_fgmask.isUMat() && OCL_PERFORMANCE_CHECK(!ocl::Device::getDefault().isIntel() || _image.channels() == 1),
+                   ocl_apply(_image, _fgmask, learningRate))
+#else
+        CV_OCL_RUN(_fgmask.isUMat() && OCL_PERFORMANCE_CHECK(!ocl::Device::getDefault().isIntel()),
+                   ocl_apply(_image, _fgmask, learningRate))
+#endif
+
+        opencl_ON = false;
+        nframes = 0;
+    }
+#endif
+
+    bool needToInitialize = nframes == 0 || learningRate >= 1 || _image.size() != frameSize || _image.type() != frameType;
+
+    if( needToInitialize )
+        initialize(_image.size(), _image.type());
+
+    Mat image = _image.getMat();
+    _fgmask.create( image.size(), CV_8U );
+    Mat fgmask = _fgmask.getMat();
+
+    ++nframes;
+    learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history );
+    CV_Assert(learningRate >= 0);
+
+    //recalculate update rates - in case alpha is changed
+    // calculate update parameters (using alpha)
+    int Kshort,Kmid,Klong;
+    //approximate exponential learning curve
+    Kshort=(int)(log(0.7)/log(1-learningRate))+1;//Kshort
+    Kmid=(int)(log(0.4)/log(1-learningRate))-Kshort+1;//Kmid
+    Klong=(int)(log(0.1)/log(1-learningRate))-Kshort-Kmid+1;//Klong
+
+    //refresh rates
+    int nShortUpdate = (Kshort/nN)+1;
+    int nMidUpdate = (Kmid/nN)+1;
+    int nLongUpdate = (Klong/nN)+1;
+
+    parallel_for_(Range(0, image.rows),
+                  KNNInvoker(image, fgmask,
+                             bgmodel.ptr(),
+                             nNextLongUpdate.ptr(),
+                             nNextMidUpdate.ptr(),
+                             nNextShortUpdate.ptr(),
+                             aModelIndexLong.ptr(),
+                             aModelIndexMid.ptr(),
+                             aModelIndexShort.ptr(),
+                             nLongCounter,
+                             nMidCounter,
+                             nShortCounter,
+                             nN,
+                             fTb,
+                             nkNN,
+                             fTau,
+                             bShadowDetection,
+                             nShadowDetection),
+                             image.total()/(double)(1 << 16));
+
+    nShortCounter++;//0,1,...,nShortUpdate-1
+    nMidCounter++;
+    nLongCounter++;
+    if (nShortCounter >= nShortUpdate)
+    {
+        nShortCounter = 0;
+        randu(nNextShortUpdate, Scalar::all(0),  Scalar::all(nShortUpdate));
+    }
+    if (nMidCounter >= nMidUpdate)
+    {
+        nMidCounter = 0;
+        randu(nNextMidUpdate, Scalar::all(0),  Scalar::all(nMidUpdate));
+    }
+    if (nLongCounter >= nLongUpdate)
+    {
+        nLongCounter = 0;
+        randu(nNextLongUpdate, Scalar::all(0),  Scalar::all(nLongUpdate));
+    }
+}
+
+void BackgroundSubtractorKNNImpl::getBackgroundImage(OutputArray backgroundImage) const
+{
+    CV_INSTRUMENT_REGION();
+
+#ifdef HAVE_OPENCL
+    if (opencl_ON)
+    {
+        CV_OCL_RUN(opencl_ON, ocl_getBackgroundImage(backgroundImage))
+
+        opencl_ON = false;
+    }
+#endif
+
+    int nchannels = CV_MAT_CN(frameType);
+    //CV_Assert( nchannels == 3 );
+    Mat meanBackground(frameSize, CV_8UC3, Scalar::all(0));
+
+    int ndata=nchannels+1;
+    int modelstep=(ndata * nN * 3);
+
+    const uchar* pbgmodel=bgmodel.ptr(0);
+    for(int row=0; row<meanBackground.rows; row++)
+    {
+        for(int col=0; col<meanBackground.cols; col++)
+        {
+            for (int n = 0; n < nN*3; n++)
+            {
+                const uchar* mean_m = &pbgmodel[n*ndata];
+                if (mean_m[nchannels])
+                {
+                    meanBackground.at<Vec3b>(row, col) = Vec3b(mean_m);
+                    break;
+                }
+            }
+            pbgmodel=pbgmodel+modelstep;
+        }
+    }
+
+    switch(CV_MAT_CN(frameType))
+    {
+        case 1:
+        {
+            std::vector<Mat> channels;
+            split(meanBackground, channels);
+            channels[0].copyTo(backgroundImage);
+            break;
+        }
+        case 3:
+        {
+            meanBackground.copyTo(backgroundImage);
+            break;
+        }
+        default:
+            CV_Error(Error::StsUnsupportedFormat, "");
+    }
+}
+
+
+Ptr<BackgroundSubtractorKNN> createBackgroundSubtractorKNN(int _history, double _threshold2,
+                                                           bool _bShadowDetection)
+{
+    return makePtr<BackgroundSubtractorKNNImpl>(_history, (float)_threshold2, _bShadowDetection);
+}
+
+}
+
+/* End of file. */
--- a/Lib/opencv/sources/modules/video/src/bgfg_gaussmix2.cpp
+++ b/Lib/opencv/sources/modules/video/src/bgfg_gaussmix2.cpp
@@ -0,0 +1,965 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*//Implementation of the Gaussian mixture model background subtraction from:
+//
+//"Improved adaptive Gaussian mixture model for background subtraction"
+//Z.Zivkovic
+//International Conference Pattern Recognition, UK, August, 2004
+//http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+//The code is very fast and performs also shadow detection.
+//Number of Gausssian components is adapted per pixel.
+//
+// and
+//
+//"Efficient Adaptive Density Estimapion per Image Pixel for the Task of Background Subtraction"
+//Z.Zivkovic, F. van der Heijden
+//Pattern Recognition Letters, vol. 27, no. 7, pages 773-780, 2006.
+//
+//The algorithm similar to the standard Stauffer&Grimson algorithm with
+//additional selection of the number of the Gaussian components based on:
+//
+//"Recursive unsupervised learning of finite mixture models "
+//Z.Zivkovic, F.van der Heijden
+//IEEE Trans. on Pattern Analysis and Machine Intelligence, vol.26, no.5, pages 651-656, 2004
+//http://www.zoranz.net/Publications/zivkovic2004PAMI.pdf
+//
+//
+//Example usage with as cpp class
+// BackgroundSubtractorMOG2 bg_model;
+//For each new image the model is updates using:
+// bg_model(img, fgmask);
+//
+//Example usage as part of the CvBGStatModel:
+// CvBGStatModel* bg_model = cvCreateGaussianBGModel2( first_frame );
+//
+// //update for each frame
+// cvUpdateBGStatModel( tmp_frame, bg_model );//segmentation result is in bg_model->foreground
+//
+// //release at the program termination
+// cvReleaseBGStatModel( &bg_model );
+//
+//Author: Z.Zivkovic, www.zoranz.net
+//Date: 7-April-2011, Version:1.0
+///////////*/
+
+#include "precomp.hpp"
+#include "opencl_kernels_video.hpp"
+
+namespace cv
+{
+
+/*
+ Interface of Gaussian mixture algorithm from:
+
+ "Improved adaptive Gaussian mixture model for background subtraction"
+ Z.Zivkovic
+ International Conference Pattern Recognition, UK, August, 2004
+ http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+
+ Advantages:
+ -fast - number of Gausssian components is constantly adapted per pixel.
+ -performs also shadow detection (see bgfg_segm_test.cpp example)
+
+*/
+
+// default parameters of gaussian background detection algorithm
+static const int defaultHistory2 = 500; // Learning rate; alpha = 1/defaultHistory2
+static const float defaultVarThreshold2 = 4.0f*4.0f;
+static const int defaultNMixtures2 = 5; // maximal number of Gaussians in mixture
+static const float defaultBackgroundRatio2 = 0.9f; // threshold sum of weights for background test
+static const float defaultVarThresholdGen2 = 3.0f*3.0f;
+static const float defaultVarInit2 = 15.0f; // initial variance for new components
+static const float defaultVarMax2 = 5*defaultVarInit2;
+static const float defaultVarMin2 = 4.0f;
+
+// additional parameters
+static const float defaultfCT2 = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
+static const unsigned char defaultnShadowDetection2 = (unsigned char)127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
+static const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
+
+
+class BackgroundSubtractorMOG2Impl CV_FINAL : public BackgroundSubtractorMOG2
+{
+public:
+    //! the default constructor
+    BackgroundSubtractorMOG2Impl()
+    {
+        frameSize = Size(0,0);
+        frameType = 0;
+
+        nframes = 0;
+        history = defaultHistory2;
+        varThreshold = defaultVarThreshold2;
+        bShadowDetection = 1;
+
+        nmixtures = defaultNMixtures2;
+        backgroundRatio = defaultBackgroundRatio2;
+        fVarInit = defaultVarInit2;
+        fVarMax  = defaultVarMax2;
+        fVarMin = defaultVarMin2;
+
+        varThresholdGen = defaultVarThresholdGen2;
+        fCT = defaultfCT2;
+        nShadowDetection =  defaultnShadowDetection2;
+        fTau = defaultfTau;
+#ifdef HAVE_OPENCL
+        opencl_ON = true;
+#endif
+    }
+    //! the full constructor that takes the length of the history,
+    // the number of gaussian mixtures, the background ratio parameter and the noise strength
+    BackgroundSubtractorMOG2Impl(int _history,  float _varThreshold, bool _bShadowDetection=true)
+    {
+        frameSize = Size(0,0);
+        frameType = 0;
+
+        nframes = 0;
+        history = _history > 0 ? _history : defaultHistory2;
+        varThreshold = (_varThreshold>0)? _varThreshold : defaultVarThreshold2;
+        bShadowDetection = _bShadowDetection;
+
+        nmixtures = defaultNMixtures2;
+        backgroundRatio = defaultBackgroundRatio2;
+        fVarInit = defaultVarInit2;
+        fVarMax  = defaultVarMax2;
+        fVarMin = defaultVarMin2;
+
+        varThresholdGen = defaultVarThresholdGen2;
+        fCT = defaultfCT2;
+        nShadowDetection =  defaultnShadowDetection2;
+        fTau = defaultfTau;
+        name_ = "BackgroundSubtractor.MOG2";
+#ifdef HAVE_OPENCL
+        opencl_ON = true;
+#endif
+    }
+    //! the destructor
+    ~BackgroundSubtractorMOG2Impl() CV_OVERRIDE {}
+    //! the update operator
+    void apply(InputArray image, OutputArray fgmask, double learningRate) CV_OVERRIDE;
+
+    //! computes a background image which are the mean of all background gaussians
+    virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE;
+
+    //! re-initiaization method
+    void initialize(Size _frameSize, int _frameType)
+    {
+        frameSize = _frameSize;
+        frameType = _frameType;
+        nframes = 0;
+
+        int nchannels = CV_MAT_CN(frameType);
+        CV_Assert( nchannels <= CV_CN_MAX );
+        CV_Assert( nmixtures <= 255);
+
+#ifdef HAVE_OPENCL
+        if (ocl::isOpenCLActivated() && opencl_ON)
+        {
+            create_ocl_apply_kernel();
+
+            bool isFloat = CV_MAKETYPE(CV_32F,nchannels) == frameType;
+            kernel_getBg.create("getBackgroundImage2_kernel", ocl::video::bgfg_mog2_oclsrc, format( "-D CN=%d -D FL=%d -D NMIXTURES=%d", nchannels, isFloat, nmixtures));
+
+            if (kernel_apply.empty() || kernel_getBg.empty())
+                opencl_ON = false;
+        }
+        else opencl_ON = false;
+
+        if (opencl_ON)
+        {
+            u_weight.create(frameSize.height * nmixtures, frameSize.width, CV_32FC1);
+            u_weight.setTo(Scalar::all(0));
+
+            u_variance.create(frameSize.height * nmixtures, frameSize.width, CV_32FC1);
+            u_variance.setTo(Scalar::all(0));
+
+            if (nchannels==3)
+                nchannels=4;
+            u_mean.create(frameSize.height * nmixtures, frameSize.width, CV_32FC(nchannels)); //4 channels
+            u_mean.setTo(Scalar::all(0));
+
+            //make the array for keeping track of the used modes per pixel - all zeros at start
+            u_bgmodelUsedModes.create(frameSize, CV_8UC1);
+            u_bgmodelUsedModes.setTo(cv::Scalar::all(0));
+        }
+        else
+#endif
+        {
+            // for each gaussian mixture of each pixel bg model we store ...
+            // the mixture weight (w),
+            // the mean (nchannels values) and
+            // the covariance
+            bgmodel.create( 1, frameSize.height*frameSize.width*nmixtures*(2 + nchannels), CV_32F );
+            //make the array for keeping track of the used modes per pixel - all zeros at start
+            bgmodelUsedModes.create(frameSize,CV_8U);
+            bgmodelUsedModes = Scalar::all(0);
+        }
+    }
+
+    virtual int getHistory() const CV_OVERRIDE { return history; }
+    virtual void setHistory(int _nframes) CV_OVERRIDE { history = _nframes; }
+
+    virtual int getNMixtures() const CV_OVERRIDE { return nmixtures; }
+    virtual void setNMixtures(int nmix) CV_OVERRIDE { nmixtures = nmix; }
+
+    virtual double getBackgroundRatio() const CV_OVERRIDE { return backgroundRatio; }
+    virtual void setBackgroundRatio(double _backgroundRatio) CV_OVERRIDE { backgroundRatio = (float)_backgroundRatio; }
+
+    virtual double getVarThreshold() const CV_OVERRIDE { return varThreshold; }
+    virtual void setVarThreshold(double _varThreshold) CV_OVERRIDE { varThreshold = _varThreshold; }
+
+    virtual double getVarThresholdGen() const CV_OVERRIDE { return varThresholdGen; }
+    virtual void setVarThresholdGen(double _varThresholdGen) CV_OVERRIDE { varThresholdGen = (float)_varThresholdGen; }
+
+    virtual double getVarInit() const CV_OVERRIDE { return fVarInit; }
+    virtual void setVarInit(double varInit) CV_OVERRIDE { fVarInit = (float)varInit; }
+
+    virtual double getVarMin() const CV_OVERRIDE { return fVarMin; }
+    virtual void setVarMin(double varMin) CV_OVERRIDE { fVarMin = (float)varMin; }
+
+    virtual double getVarMax() const CV_OVERRIDE { return fVarMax; }
+    virtual void setVarMax(double varMax) CV_OVERRIDE { fVarMax = (float)varMax; }
+
+    virtual double getComplexityReductionThreshold() const CV_OVERRIDE { return fCT; }
+    virtual void setComplexityReductionThreshold(double ct) CV_OVERRIDE { fCT = (float)ct; }
+
+    virtual bool getDetectShadows() const CV_OVERRIDE { return bShadowDetection; }
+    virtual void setDetectShadows(bool detectshadows) CV_OVERRIDE
+    {
+        if (bShadowDetection == detectshadows)
+            return;
+        bShadowDetection = detectshadows;
+#ifdef HAVE_OPENCL
+        if (!kernel_apply.empty())
+        {
+            create_ocl_apply_kernel();
+            CV_Assert( !kernel_apply.empty() );
+        }
+#endif
+    }
+
+    virtual int getShadowValue() const CV_OVERRIDE { return nShadowDetection; }
+    virtual void setShadowValue(int value) CV_OVERRIDE { nShadowDetection = (uchar)value; }
+
+    virtual double getShadowThreshold() const CV_OVERRIDE { return fTau; }
+    virtual void setShadowThreshold(double value) CV_OVERRIDE { fTau = (float)value; }
+
+    virtual void write(FileStorage& fs) const CV_OVERRIDE
+    {
+        writeFormat(fs);
+        fs << "name" << name_
+        << "history" << history
+        << "nmixtures" << nmixtures
+        << "backgroundRatio" << backgroundRatio
+        << "varThreshold" << varThreshold
+        << "varThresholdGen" << varThresholdGen
+        << "varInit" << fVarInit
+        << "varMin" << fVarMin
+        << "varMax" << fVarMax
+        << "complexityReductionThreshold" << fCT
+        << "detectShadows" << (int)bShadowDetection
+        << "shadowValue" << (int)nShadowDetection
+        << "shadowThreshold" << fTau;
+    }
+
+    virtual void read(const FileNode& fn) CV_OVERRIDE
+    {
+        CV_Assert( (String)fn["name"] == name_ );
+        history = (int)fn["history"];
+        nmixtures = (int)fn["nmixtures"];
+        backgroundRatio = (float)fn["backgroundRatio"];
+        varThreshold = (double)fn["varThreshold"];
+        varThresholdGen = (float)fn["varThresholdGen"];
+        fVarInit = (float)fn["varInit"];
+        fVarMin = (float)fn["varMin"];
+        fVarMax = (float)fn["varMax"];
+        fCT = (float)fn["complexityReductionThreshold"];
+        bShadowDetection = (int)fn["detectShadows"] != 0;
+        nShadowDetection = saturate_cast<uchar>((int)fn["shadowValue"]);
+        fTau = (float)fn["shadowThreshold"];
+    }
+
+protected:
+    Size frameSize;
+    int frameType;
+    Mat bgmodel;
+    Mat bgmodelUsedModes;//keep track of number of modes per pixel
+
+#ifdef HAVE_OPENCL
+    //for OCL
+
+    mutable bool opencl_ON;
+
+    UMat u_weight;
+    UMat u_variance;
+    UMat u_mean;
+    UMat u_bgmodelUsedModes;
+
+    mutable ocl::Kernel kernel_apply;
+    mutable ocl::Kernel kernel_getBg;
+#endif
+
+    int nframes;
+    int history;
+    int nmixtures;
+    //! here it is the maximum allowed number of mixture components.
+    //! Actual number is determined dynamically per pixel
+    double varThreshold;
+    // threshold on the squared Mahalanobis distance to decide if it is well described
+    // by the background model or not. Related to Cthr from the paper.
+    // This does not influence the update of the background. A typical value could be 4 sigma
+    // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
+
+    /////////////////////////
+    // less important parameters - things you might change but be careful
+    ////////////////////////
+    float backgroundRatio;
+    // corresponds to fTB=1-cf from the paper
+    // TB - threshold when the component becomes significant enough to be included into
+    // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+    // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+    // it is considered foreground
+    // float noiseSigma;
+    float varThresholdGen;
+    //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
+    //when a sample is close to the existing components. If it is not close
+    //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+    //Smaller Tg leads to more generated components and higher Tg might make
+    //lead to small number of components but they can grow too large
+    float fVarInit;
+    float fVarMin;
+    float fVarMax;
+    //initial variance  for the newly generated components.
+    //It will will influence the speed of adaptation. A good guess should be made.
+    //A simple way is to estimate the typical standard deviation from the images.
+    //I used here 10 as a reasonable value
+    // min and max can be used to further control the variance
+    float fCT;//CT - complexity reduction prior
+    //this is related to the number of samples needed to accept that a component
+    //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+    //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+
+    //shadow detection parameters
+    bool bShadowDetection;//default 1 - do shadow detection
+    unsigned char nShadowDetection;//do shadow detection - insert this value as the detection result - 127 default value
+    float fTau;
+    // Tau - shadow threshold. The shadow is detected if the pixel is darker
+    //version of the background. Tau is a threshold on how much darker the shadow can be.
+    //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+    //See: Prati,Mikic,Trivedi,Cucchiara,"Detecting Moving Shadows...",IEEE PAMI,2003.
+
+    String name_;
+
+    template <typename T, int CN>
+    void getBackgroundImage_intern(OutputArray backgroundImage) const;
+
+#ifdef HAVE_OPENCL
+    bool ocl_getBackgroundImage(OutputArray backgroundImage) const;
+    bool ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate=-1);
+    void create_ocl_apply_kernel();
+#endif
+};
+
+struct GaussBGStatModel2Params
+{
+    //image info
+    int nWidth;
+    int nHeight;
+    int nND;//number of data dimensions (image channels)
+
+    bool bPostFiltering;//default 1 - do postfiltering - will make shadow detection results also give value 255
+    double  minArea; // for postfiltering
+
+    bool bInit;//default 1, faster updates at start
+
+    /////////////////////////
+    //very important parameters - things you will change
+    ////////////////////////
+    float fAlphaT;
+    //alpha - speed of update - if the time interval you want to average over is T
+    //set alpha=1/T. It is also useful at start to make T slowly increase
+    //from 1 until the desired T
+    float fTb;
+    //Tb - threshold on the squared Mahalan. dist. to decide if it is well described
+    //by the background model or not. Related to Cthr from the paper.
+    //This does not influence the update of the background. A typical value could be 4 sigma
+    //and that is Tb=4*4=16;
+
+    /////////////////////////
+    //less important parameters - things you might change but be careful
+    ////////////////////////
+    float fTg;
+    //Tg - threshold on the squared Mahalan. dist. to decide
+    //when a sample is close to the existing components. If it is not close
+    //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+    //Smaller Tg leads to more generated components and higher Tg might make
+    //lead to small number of components but they can grow too large
+    float fTB;//1-cf from the paper
+    //TB - threshold when the component becomes significant enough to be included into
+    //the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+    //For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+    //it is considered foreground
+    float fVarInit;
+    float fVarMax;
+    float fVarMin;
+    //initial standard deviation  for the newly generated components.
+    //It will will influence the speed of adaptation. A good guess should be made.
+    //A simple way is to estimate the typical standard deviation from the images.
+    //I used here 10 as a reasonable value
+    float fCT;//CT - complexity reduction prior
+    //this is related to the number of samples needed to accept that a component
+    //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+    //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+
+    //even less important parameters
+    int nM;//max number of modes - const - 4 is usually enough
+
+    //shadow detection parameters
+    bool bShadowDetection;//default 1 - do shadow detection
+    unsigned char nShadowDetection;//do shadow detection - insert this value as the detection result
+    float fTau;
+    // Tau - shadow threshold. The shadow is detected if the pixel is darker
+    //version of the background. Tau is a threshold on how much darker the shadow can be.
+    //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+    //See: Prati,Mikic,Trivedi,Cucchiara,"Detecting Moving Shadows...",IEEE PAMI,2003.
+};
+
+struct GMM
+{
+    float weight;
+    float variance;
+};
+
+// shadow detection performed per pixel
+// should work for rgb data, could be useful for gray scale and depth data as well
+// See: Prati,Mikic,Trivedi,Cucchiara,"Detecting Moving Shadows...",IEEE PAMI,2003.
+CV_INLINE bool
+detectShadowGMM(const float* data, int nchannels, int nmodes,
+                const GMM* gmm, const float* mean,
+                float Tb, float TB, float tau)
+{
+    float tWeight = 0;
+
+    // check all the components  marked as background:
+    for( int mode = 0; mode < nmodes; mode++, mean += nchannels )
+    {
+        GMM g = gmm[mode];
+
+        float numerator = 0.0f;
+        float denominator = 0.0f;
+        for( int c = 0; c < nchannels; c++ )
+        {
+            numerator   += data[c] * mean[c];
+            denominator += mean[c] * mean[c];
+        }
+
+        // no division by zero allowed
+        if( denominator == 0 )
+            return false;
+
+        // if tau < a < 1 then also check the color distortion
+        if( numerator <= denominator && numerator >= tau*denominator )
+        {
+            float a = numerator / denominator;
+            float dist2a = 0.0f;
+
+            for( int c = 0; c < nchannels; c++ )
+            {
+                float dD= a*mean[c] - data[c];
+                dist2a += dD*dD;
+            }
+
+            if (dist2a < Tb*g.variance*a*a)
+                return true;
+        };
+
+        tWeight += g.weight;
+        if( tWeight > TB )
+            return false;
+    };
+    return false;
+}
+
+//update GMM - the base update function performed per pixel
+//
+//"Efficient Adaptive Density Estimapion per Image Pixel for the Task of Background Subtraction"
+//Z.Zivkovic, F. van der Heijden
+//Pattern Recognition Letters, vol. 27, no. 7, pages 773-780, 2006.
+//
+//The algorithm similar to the standard Stauffer&Grimson algorithm with
+//additional selection of the number of the Gaussian components based on:
+//
+//"Recursive unsupervised learning of finite mixture models "
+//Z.Zivkovic, F.van der Heijden
+//IEEE Trans. on Pattern Analysis and Machine Intelligence, vol.26, no.5, pages 651-656, 2004
+//http://www.zoranz.net/Publications/zivkovic2004PAMI.pdf
+
+class MOG2Invoker : public ParallelLoopBody
+{
+public:
+    MOG2Invoker(const Mat& _src, Mat& _dst,
+                GMM* _gmm, float* _mean,
+                uchar* _modesUsed,
+                int _nmixtures, float _alphaT,
+                float _Tb, float _TB, float _Tg,
+                float _varInit, float _varMin, float _varMax,
+                float _prune, float _tau, bool _detectShadows,
+                uchar _shadowVal)
+    {
+        src = &_src;
+        dst = &_dst;
+        gmm0 = _gmm;
+        mean0 = _mean;
+        modesUsed0 = _modesUsed;
+        nmixtures = _nmixtures;
+        alphaT = _alphaT;
+        Tb = _Tb;
+        TB = _TB;
+        Tg = _Tg;
+        varInit = _varInit;
+        varMin = MIN(_varMin, _varMax);
+        varMax = MAX(_varMin, _varMax);
+        prune = _prune;
+        tau = _tau;
+        detectShadows = _detectShadows;
+        shadowVal = _shadowVal;
+    }
+
+    void operator()(const Range& range) const CV_OVERRIDE
+    {
+        int y0 = range.start, y1 = range.end;
+        int ncols = src->cols, nchannels = src->channels();
+        AutoBuffer<float> buf(src->cols*nchannels);
+        float alpha1 = 1.f - alphaT;
+        float dData[CV_CN_MAX];
+
+        for( int y = y0; y < y1; y++ )
+        {
+            const float* data = buf.data();
+            if( src->depth() != CV_32F )
+                src->row(y).convertTo(Mat(1, ncols, CV_32FC(nchannels), (void*)data), CV_32F);
+            else
+                data = src->ptr<float>(y);
+
+            float* mean = mean0 + ncols*nmixtures*nchannels*y;
+            GMM* gmm = gmm0 + ncols*nmixtures*y;
+            uchar* modesUsed = modesUsed0 + ncols*y;
+            uchar* mask = dst->ptr(y);
+
+            for( int x = 0; x < ncols; x++, data += nchannels, gmm += nmixtures, mean += nmixtures*nchannels )
+            {
+                //calculate distances to the modes (+ sort)
+                //here we need to go in descending order!!!
+                bool background = false;//return value -> true - the pixel classified as background
+
+                //internal:
+                bool fitsPDF = false;//if it remains zero a new GMM mode will be added
+                int nmodes = modesUsed[x];//current number of modes in GMM
+                float totalWeight = 0.f;
+
+                float* mean_m = mean;
+
+                //////
+                //go through all modes
+                for( int mode = 0; mode < nmodes; mode++, mean_m += nchannels )
+                {
+                    float weight = alpha1*gmm[mode].weight + prune;//need only weight if fit is found
+                    int swap_count = 0;
+                    ////
+                    //fit not found yet
+                    if( !fitsPDF )
+                    {
+                        //check if it belongs to some of the remaining modes
+                        float var = gmm[mode].variance;
+
+                        //calculate difference and distance
+                        float dist2;
+
+                        if( nchannels == 3 )
+                        {
+                            dData[0] = mean_m[0] - data[0];
+                            dData[1] = mean_m[1] - data[1];
+                            dData[2] = mean_m[2] - data[2];
+                            dist2 = dData[0]*dData[0] + dData[1]*dData[1] + dData[2]*dData[2];
+                        }
+                        else
+                        {
+                            dist2 = 0.f;
+                            for( int c = 0; c < nchannels; c++ )
+                            {
+                                dData[c] = mean_m[c] - data[c];
+                                dist2 += dData[c]*dData[c];
+                            }
+                        }
+
+                        //background? - Tb - usually larger than Tg
+                        if( totalWeight < TB && dist2 < Tb*var )
+                            background = true;
+
+                        //check fit
+                        if( dist2 < Tg*var )
+                        {
+                            /////
+                            //belongs to the mode
+                            fitsPDF = true;
+
+                            //update distribution
+
+                            //update weight
+                            weight += alphaT;
+                            float k = alphaT/weight;
+
+                            //update mean
+                            for( int c = 0; c < nchannels; c++ )
+                                mean_m[c] -= k*dData[c];
+
+                            //update variance
+                            float varnew = var + k*(dist2-var);
+                            //limit the variance
+                            varnew = MAX(varnew, varMin);
+                            varnew = MIN(varnew, varMax);
+                            gmm[mode].variance = varnew;
+
+                            //sort
+                            //all other weights are at the same place and
+                            //only the matched (iModes) is higher -> just find the new place for it
+                            for( int i = mode; i > 0; i-- )
+                            {
+                                //check one up
+                                if( weight < gmm[i-1].weight )
+                                    break;
+
+                                swap_count++;
+                                //swap one up
+                                std::swap(gmm[i], gmm[i-1]);
+                                for( int c = 0; c < nchannels; c++ )
+                                    std::swap(mean[i*nchannels + c], mean[(i-1)*nchannels + c]);
+                            }
+                            //belongs to the mode - bFitsPDF becomes 1
+                            /////
+                        }
+                    }//!bFitsPDF)
+
+                    //check prune
+                    if( weight < -prune )
+                    {
+                        weight = 0.0;
+                        nmodes--;
+                    }
+
+                    gmm[mode-swap_count].weight = weight;//update weight by the calculated value
+                    totalWeight += weight;
+                }
+                //go through all modes
+                //////
+
+                // Renormalize weights. In the special case that the pixel does
+                // not agree with any modes, set weights to zero (a new mode will be added below).
+                float invWeight = 0.f;
+                if (std::abs(totalWeight) > FLT_EPSILON) {
+                    invWeight = 1.f/totalWeight;
+                }
+
+                for( int mode = 0; mode < nmodes; mode++ )
+                {
+                    gmm[mode].weight *= invWeight;
+                }
+
+                //make new mode if needed and exit
+                if( !fitsPDF && alphaT > 0.f )
+                {
+                    // replace the weakest or add a new one
+                    int mode = nmodes == nmixtures ? nmixtures-1 : nmodes++;
+
+                    if (nmodes==1)
+                        gmm[mode].weight = 1.f;
+                    else
+                    {
+                        gmm[mode].weight = alphaT;
+
+                        // renormalize all other weights
+                        for( int i = 0; i < nmodes-1; i++ )
+                            gmm[i].weight *= alpha1;
+                    }
+
+                    // init
+                    for( int c = 0; c < nchannels; c++ )
+                        mean[mode*nchannels + c] = data[c];
+
+                    gmm[mode].variance = varInit;
+
+                    //sort
+                    //find the new place for it
+                    for( int i = nmodes - 1; i > 0; i-- )
+                    {
+                        // check one up
+                        if( alphaT < gmm[i-1].weight )
+                            break;
+
+                        // swap one up
+                        std::swap(gmm[i], gmm[i-1]);
+                        for( int c = 0; c < nchannels; c++ )
+                            std::swap(mean[i*nchannels + c], mean[(i-1)*nchannels + c]);
+                    }
+                }
+
+                //set the number of modes
+                modesUsed[x] = uchar(nmodes);
+                mask[x] = background ? 0 :
+                    detectShadows && detectShadowGMM(data, nchannels, nmodes, gmm, mean, Tb, TB, tau) ?
+                    shadowVal : 255;
+            }
+        }
+    }
+
+    const Mat* src;
+    Mat* dst;
+    GMM* gmm0;
+    float* mean0;
+    uchar* modesUsed0;
+
+    int nmixtures;
+    float alphaT, Tb, TB, Tg;
+    float varInit, varMin, varMax, prune, tau;
+
+    bool detectShadows;
+    uchar shadowVal;
+};
+
+#ifdef HAVE_OPENCL
+
+bool BackgroundSubtractorMOG2Impl::ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate)
+{
+    bool needToInitialize = nframes == 0 || learningRate >= 1 || _image.size() != frameSize || _image.type() != frameType;
+
+    if( needToInitialize )
+        initialize(_image.size(), _image.type());
+
+    ++nframes;
+    learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history );
+    CV_Assert(learningRate >= 0);
+
+    _fgmask.create(_image.size(), CV_8U);
+    UMat fgmask = _fgmask.getUMat();
+
+    const double alpha1 = 1.0f - learningRate;
+
+    UMat frame = _image.getUMat();
+
+    float varMax = MAX(fVarMin, fVarMax);
+    float varMin = MIN(fVarMin, fVarMax);
+
+    int idxArg = 0;
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::ReadOnly(frame));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_bgmodelUsedModes));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_weight));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_mean));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_variance));
+    idxArg = kernel_apply.set(idxArg, ocl::KernelArg::WriteOnlyNoSize(fgmask));
+
+    idxArg = kernel_apply.set(idxArg, (float)learningRate);        //alphaT
+    idxArg = kernel_apply.set(idxArg, (float)alpha1);
+    idxArg = kernel_apply.set(idxArg, (float)(-learningRate*fCT));   //prune
+
+    idxArg = kernel_apply.set(idxArg, (float)varThreshold); //c_Tb
+    idxArg = kernel_apply.set(idxArg, backgroundRatio);     //c_TB
+    idxArg = kernel_apply.set(idxArg, varThresholdGen);     //c_Tg
+    idxArg = kernel_apply.set(idxArg, varMin);
+    idxArg = kernel_apply.set(idxArg, varMax);
+    idxArg = kernel_apply.set(idxArg, fVarInit);
+    idxArg = kernel_apply.set(idxArg, fTau);
+    if (bShadowDetection)
+        kernel_apply.set(idxArg, nShadowDetection);
+
+    size_t globalsize[] = {(size_t)frame.cols, (size_t)frame.rows, 1};
+    return kernel_apply.run(2, globalsize, NULL, true);
+}
+
+bool BackgroundSubtractorMOG2Impl::ocl_getBackgroundImage(OutputArray _backgroundImage) const
+{
+    _backgroundImage.create(frameSize, frameType);
+    UMat dst = _backgroundImage.getUMat();
+
+    int idxArg = 0;
+    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_bgmodelUsedModes));
+    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_weight));
+    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_mean));
+    idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::WriteOnly(dst));
+    kernel_getBg.set(idxArg, backgroundRatio);
+
+    size_t globalsize[2] = {(size_t)u_bgmodelUsedModes.cols, (size_t)u_bgmodelUsedModes.rows};
+
+    return kernel_getBg.run(2, globalsize, NULL, false);
+}
+
+void BackgroundSubtractorMOG2Impl::create_ocl_apply_kernel()
+{
+    int nchannels = CV_MAT_CN(frameType);
+    bool isFloat = CV_MAKETYPE(CV_32F,nchannels) == frameType;
+    String opts = format("-D CN=%d -D FL=%d -D NMIXTURES=%d%s", nchannels, isFloat, nmixtures, bShadowDetection ? " -D SHADOW_DETECT" : "");
+    kernel_apply.create("mog2_kernel", ocl::video::bgfg_mog2_oclsrc, opts);
+}
+
+#endif
+
+void BackgroundSubtractorMOG2Impl::apply(InputArray _image, OutputArray _fgmask, double learningRate)
+{
+    CV_INSTRUMENT_REGION();
+
+#ifdef HAVE_OPENCL
+    if (opencl_ON)
+    {
+        CV_OCL_RUN(_fgmask.isUMat(), ocl_apply(_image, _fgmask, learningRate))
+
+        opencl_ON = false;
+        nframes = 0;
+    }
+#endif
+
+    bool needToInitialize = nframes == 0 || learningRate >= 1 || _image.size() != frameSize || _image.type() != frameType;
+
+    if( needToInitialize )
+        initialize(_image.size(), _image.type());
+
+    Mat image = _image.getMat();
+    _fgmask.create( image.size(), CV_8U );
+    Mat fgmask = _fgmask.getMat();
+
+    ++nframes;
+    learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history );
+    CV_Assert(learningRate >= 0);
+
+    parallel_for_(Range(0, image.rows),
+                  MOG2Invoker(image, fgmask,
+                              bgmodel.ptr<GMM>(),
+                              (float*)(bgmodel.ptr() + sizeof(GMM)*nmixtures*image.rows*image.cols),
+                              bgmodelUsedModes.ptr(), nmixtures, (float)learningRate,
+                              (float)varThreshold,
+                              backgroundRatio, varThresholdGen,
+                              fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau,
+                              bShadowDetection, nShadowDetection),
+                              image.total()/(double)(1 << 16));
+}
+
+template <typename T, int CN>
+void BackgroundSubtractorMOG2Impl::getBackgroundImage_intern(OutputArray backgroundImage) const
+{
+    CV_INSTRUMENT_REGION();
+
+    Mat meanBackground(frameSize, frameType, Scalar::all(0));
+    int firstGaussianIdx = 0;
+    const GMM* gmm = bgmodel.ptr<GMM>();
+    const float* mean = reinterpret_cast<const float*>(gmm + frameSize.width*frameSize.height*nmixtures);
+    Vec<float,CN> meanVal(0.f);
+    for(int row=0; row<meanBackground.rows; row++)
+    {
+        for(int col=0; col<meanBackground.cols; col++)
+        {
+            int nmodes = bgmodelUsedModes.at<uchar>(row, col);
+            float totalWeight = 0.f;
+            for(int gaussianIdx = firstGaussianIdx; gaussianIdx < firstGaussianIdx + nmodes; gaussianIdx++)
+            {
+                GMM gaussian = gmm[gaussianIdx];
+                size_t meanPosition = gaussianIdx*CN;
+                for(int chn = 0; chn < CN; chn++)
+                {
+                    meanVal(chn) += gaussian.weight * mean[meanPosition + chn];
+                }
+                totalWeight += gaussian.weight;
+
+                if(totalWeight > backgroundRatio)
+                    break;
+            }
+            float invWeight = 0.f;
+            if (std::abs(totalWeight) > FLT_EPSILON) {
+                invWeight = 1.f/totalWeight;
+            }
+
+            meanBackground.at<Vec<T,CN> >(row, col) = Vec<T,CN>(meanVal * invWeight);
+            meanVal = 0.f;
+
+            firstGaussianIdx += nmixtures;
+        }
+    }
+    meanBackground.copyTo(backgroundImage);
+}
+
+void BackgroundSubtractorMOG2Impl::getBackgroundImage(OutputArray backgroundImage) const
+{
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_32FC1 || frameType == CV_32FC3);
+
+#ifdef HAVE_OPENCL
+    if (opencl_ON)
+    {
+        CV_OCL_RUN(opencl_ON, ocl_getBackgroundImage(backgroundImage))
+
+        opencl_ON = false;
+    }
+#endif
+
+    switch(frameType)
+    {
+    case CV_8UC1:
+        getBackgroundImage_intern<uchar,1>(backgroundImage);
+        break;
+    case CV_8UC3:
+        getBackgroundImage_intern<uchar,3>(backgroundImage);
+        break;
+    case CV_32FC1:
+        getBackgroundImage_intern<float,1>(backgroundImage);
+        break;
+    case CV_32FC3:
+        getBackgroundImage_intern<float,3>(backgroundImage);
+        break;
+    }
+}
+
+Ptr<BackgroundSubtractorMOG2> createBackgroundSubtractorMOG2(int _history, double _varThreshold,
+                                                             bool _bShadowDetection)
+{
+    return makePtr<BackgroundSubtractorMOG2Impl>(_history, (float)_varThreshold, _bShadowDetection);
+}
+
+}
+
+/* End of file. */
--- a/Lib/opencv/sources/modules/video/src/camshift.cpp
+++ b/Lib/opencv/sources/modules/video/src/camshift.cpp
@@ -0,0 +1,220 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "precomp.hpp"
+
+int cv::meanShift( InputArray _probImage, Rect& window, TermCriteria criteria )
+{
+    CV_INSTRUMENT_REGION();
+
+    Size size;
+    int cn;
+    Mat mat;
+    UMat umat;
+    bool isUMat = _probImage.isUMat();
+
+    if (isUMat)
+        umat = _probImage.getUMat(), cn = umat.channels(), size = umat.size();
+    else
+        mat = _probImage.getMat(), cn = mat.channels(), size = mat.size();
+
+    Rect cur_rect = window;
+
+    CV_Assert( cn == 1 );
+
+    if( window.height <= 0 || window.width <= 0 )
+        CV_Error( Error::StsBadArg, "Input window has non-positive sizes" );
+
+    window = window & Rect(0, 0, size.width, size.height);
+
+    double eps = (criteria.type & TermCriteria::EPS) ? std::max(criteria.epsilon, 0.) : 1.;
+    eps = cvRound(eps*eps);
+    int i, niters = (criteria.type & TermCriteria::MAX_ITER) ? std::max(criteria.maxCount, 1) : 100;
+
+    for( i = 0; i < niters; i++ )
+    {
+        cur_rect = cur_rect & Rect(0, 0, size.width, size.height);
+        if( cur_rect == Rect() )
+        {
+            cur_rect.x = size.width/2;
+            cur_rect.y = size.height/2;
+        }
+        cur_rect.width = std::max(cur_rect.width, 1);
+        cur_rect.height = std::max(cur_rect.height, 1);
+
+        Moments m = isUMat ? moments(umat(cur_rect)) : moments(mat(cur_rect));
+
+        // Calculating center of mass
+        if( fabs(m.m00) < DBL_EPSILON )
+            break;
+
+        int dx = cvRound( m.m10/m.m00 - window.width*0.5 );
+        int dy = cvRound( m.m01/m.m00 - window.height*0.5 );
+
+        int nx = std::min(std::max(cur_rect.x + dx, 0), size.width - cur_rect.width);
+        int ny = std::min(std::max(cur_rect.y + dy, 0), size.height - cur_rect.height);
+
+        dx = nx - cur_rect.x;
+        dy = ny - cur_rect.y;
+        cur_rect.x = nx;
+        cur_rect.y = ny;
+
+        // Check for coverage centers mass & window
+        if( dx*dx + dy*dy < eps )
+            break;
+    }
+
+    window = cur_rect;
+    return i;
+}
+
+
+cv::RotatedRect cv::CamShift( InputArray _probImage, Rect& window,
+                              TermCriteria criteria )
+{
+    CV_INSTRUMENT_REGION();
+
+    const int TOLERANCE = 10;
+    Size size;
+    Mat mat;
+    UMat umat;
+    bool isUMat = _probImage.isUMat();
+
+    if (isUMat)
+        umat = _probImage.getUMat(), size = umat.size();
+    else
+        mat = _probImage.getMat(), size = mat.size();
+
+    meanShift( _probImage, window, criteria );
+
+    window.x -= TOLERANCE;
+    if( window.x < 0 )
+        window.x = 0;
+
+    window.y -= TOLERANCE;
+    if( window.y < 0 )
+        window.y = 0;
+
+    window.width += 2 * TOLERANCE;
+    if( window.x + window.width > size.width )
+        window.width = size.width - window.x;
+
+    window.height += 2 * TOLERANCE;
+    if( window.y + window.height > size.height )
+        window.height = size.height - window.y;
+
+    // Calculating moments in new center mass
+    Moments m = isUMat ? moments(umat(window)) : moments(mat(window));
+
+    double m00 = m.m00, m10 = m.m10, m01 = m.m01;
+    double mu11 = m.mu11, mu20 = m.mu20, mu02 = m.mu02;
+
+    if( fabs(m00) < DBL_EPSILON )
+        return RotatedRect();
+
+    double inv_m00 = 1. / m00;
+    int xc = cvRound( m10 * inv_m00 + window.x );
+    int yc = cvRound( m01 * inv_m00 + window.y );
+    double a = mu20 * inv_m00, b = mu11 * inv_m00, c = mu02 * inv_m00;
+
+    // Calculating width & height
+    double square = std::sqrt( 4 * b * b + (a - c) * (a - c) );
+
+    // Calculating orientation
+    double theta = atan2( 2 * b, a - c + square );
+
+    // Calculating width & length of figure
+    double cs = cos( theta );
+    double sn = sin( theta );
+
+    double rotate_a = cs * cs * mu20 + 2 * cs * sn * mu11 + sn * sn * mu02;
+    double rotate_c = sn * sn * mu20 - 2 * cs * sn * mu11 + cs * cs * mu02;
+    rotate_a = std::max(0.0, rotate_a);  // avoid negative result due calculation numeric errors
+    rotate_c = std::max(0.0, rotate_c);  // avoid negative result due calculation numeric errors
+    double length = std::sqrt( rotate_a * inv_m00 ) * 4;
+    double width = std::sqrt( rotate_c * inv_m00 ) * 4;
+
+    // In case, when tetta is 0 or 1.57... the Length & Width may be exchanged
+    if( length < width )
+    {
+        std::swap( length, width );
+        std::swap( cs, sn );
+        theta = CV_PI*0.5 - theta;
+    }
+
+    // Saving results
+    int _xc = cvRound( xc );
+    int _yc = cvRound( yc );
+
+    int t0 = cvRound( fabs( length * cs ));
+    int t1 = cvRound( fabs( width * sn ));
+
+    t0 = MAX( t0, t1 ) + 2;
+    window.width = MIN( t0, (size.width - _xc) * 2 );
+
+    t0 = cvRound( fabs( length * sn ));
+    t1 = cvRound( fabs( width * cs ));
+
+    t0 = MAX( t0, t1 ) + 2;
+    window.height = MIN( t0, (size.height - _yc) * 2 );
+
+    window.x = MAX( 0, _xc - window.width / 2 );
+    window.y = MAX( 0, _yc - window.height / 2 );
+
+    window.width = MIN( size.width - window.x, window.width );
+    window.height = MIN( size.height - window.y, window.height );
+
+    RotatedRect box;
+    box.size.height = (float)length;
+    box.size.width = (float)width;
+    box.angle = (float)((CV_PI*0.5+theta)*180./CV_PI);
+    while(box.angle < 0)
+        box.angle += 360;
+    while(box.angle >= 360)
+        box.angle -= 360;
+    if(box.angle >= 180)
+        box.angle -= 180;
+    box.center = Point2f( window.x + window.width*0.5f, window.y + window.height*0.5f);
+
+    return box;
+}
+
+/* End of file. */
--- a/Lib/opencv/sources/modules/video/src/dis_flow.cpp
+++ b/Lib/opencv/sources/modules/video/src/dis_flow.cpp
--- a/Lib/opencv/sources/modules/video/src/ecc.cpp
+++ b/Lib/opencv/sources/modules/video/src/ecc.cpp
@@ -0,0 +1,601 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+
+/****************************************************************************************\
+*                                       Image Alignment (ECC algorithm)                  *
+\****************************************************************************************/
+
+using namespace cv;
+
+static void image_jacobian_homo_ECC(const Mat& src1, const Mat& src2,
+                                    const Mat& src3, const Mat& src4,
+                                    const Mat& src5, Mat& dst)
+{
+
+
+    CV_Assert(src1.size() == src2.size());
+    CV_Assert(src1.size() == src3.size());
+    CV_Assert(src1.size() == src4.size());
+
+    CV_Assert( src1.rows == dst.rows);
+    CV_Assert(dst.cols == (src1.cols*8));
+    CV_Assert(dst.type() == CV_32FC1);
+
+    CV_Assert(src5.isContinuous());
+
+
+    const float* hptr = src5.ptr<float>(0);
+
+    const float h0_ = hptr[0];
+    const float h1_ = hptr[3];
+    const float h2_ = hptr[6];
+    const float h3_ = hptr[1];
+    const float h4_ = hptr[4];
+    const float h5_ = hptr[7];
+    const float h6_ = hptr[2];
+    const float h7_ = hptr[5];
+
+    const int w = src1.cols;
+
+
+    //create denominator for all points as a block
+    Mat den_ = src3*h2_ + src4*h5_ + 1.0;//check the time of this! otherwise use addWeighted
+
+    //create projected points
+    Mat hatX_ = -src3*h0_ - src4*h3_ - h6_;
+    divide(hatX_, den_, hatX_);
+    Mat hatY_ = -src3*h1_ - src4*h4_ - h7_;
+    divide(hatY_, den_, hatY_);
+
+
+    //instead of dividing each block with den,
+    //just pre-divide the block of gradients (it's more efficient)
+
+    Mat src1Divided_;
+    Mat src2Divided_;
+
+    divide(src1, den_, src1Divided_);
+    divide(src2, den_, src2Divided_);
+
+
+    //compute Jacobian blocks (8 blocks)
+
+    dst.colRange(0, w) = src1Divided_.mul(src3);//1
+
+    dst.colRange(w,2*w) = src2Divided_.mul(src3);//2
+
+    Mat temp_ = (hatX_.mul(src1Divided_)+hatY_.mul(src2Divided_));
+    dst.colRange(2*w,3*w) = temp_.mul(src3);//3
+
+    hatX_.release();
+    hatY_.release();
+
+    dst.colRange(3*w, 4*w) = src1Divided_.mul(src4);//4
+
+    dst.colRange(4*w, 5*w) = src2Divided_.mul(src4);//5
+
+    dst.colRange(5*w, 6*w) = temp_.mul(src4);//6
+
+    src1Divided_.copyTo(dst.colRange(6*w, 7*w));//7
+
+    src2Divided_.copyTo(dst.colRange(7*w, 8*w));//8
+}
+
+static void image_jacobian_euclidean_ECC(const Mat& src1, const Mat& src2,
+                                         const Mat& src3, const Mat& src4,
+                                         const Mat& src5, Mat& dst)
+{
+
+    CV_Assert( src1.size()==src2.size());
+    CV_Assert( src1.size()==src3.size());
+    CV_Assert( src1.size()==src4.size());
+
+    CV_Assert( src1.rows == dst.rows);
+    CV_Assert(dst.cols == (src1.cols*3));
+    CV_Assert(dst.type() == CV_32FC1);
+
+    CV_Assert(src5.isContinuous());
+
+    const float* hptr = src5.ptr<float>(0);
+
+    const float h0 = hptr[0];//cos(theta)
+    const float h1 = hptr[3];//sin(theta)
+
+    const int w = src1.cols;
+
+    //create -sin(theta)*X -cos(theta)*Y for all points as a block -> hatX
+    Mat hatX = -(src3*h1) - (src4*h0);
+
+    //create cos(theta)*X -sin(theta)*Y for all points as a block -> hatY
+    Mat hatY = (src3*h0) - (src4*h1);
+
+
+    //compute Jacobian blocks (3 blocks)
+    dst.colRange(0, w) = (src1.mul(hatX))+(src2.mul(hatY));//1
+
+    src1.copyTo(dst.colRange(w, 2*w));//2
+    src2.copyTo(dst.colRange(2*w, 3*w));//3
+}
+
+
+static void image_jacobian_affine_ECC(const Mat& src1, const Mat& src2,
+                                      const Mat& src3, const Mat& src4,
+                                      Mat& dst)
+{
+
+    CV_Assert(src1.size() == src2.size());
+    CV_Assert(src1.size() == src3.size());
+    CV_Assert(src1.size() == src4.size());
+
+    CV_Assert(src1.rows == dst.rows);
+    CV_Assert(dst.cols == (6*src1.cols));
+
+    CV_Assert(dst.type() == CV_32FC1);
+
+
+    const int w = src1.cols;
+
+    //compute Jacobian blocks (6 blocks)
+
+    dst.colRange(0,w) = src1.mul(src3);//1
+    dst.colRange(w,2*w) = src2.mul(src3);//2
+    dst.colRange(2*w,3*w) = src1.mul(src4);//3
+    dst.colRange(3*w,4*w) = src2.mul(src4);//4
+    src1.copyTo(dst.colRange(4*w,5*w));//5
+    src2.copyTo(dst.colRange(5*w,6*w));//6
+}
+
+
+static void image_jacobian_translation_ECC(const Mat& src1, const Mat& src2, Mat& dst)
+{
+
+    CV_Assert( src1.size()==src2.size());
+
+    CV_Assert( src1.rows == dst.rows);
+    CV_Assert(dst.cols == (src1.cols*2));
+    CV_Assert(dst.type() == CV_32FC1);
+
+    const int w = src1.cols;
+
+    //compute Jacobian blocks (2 blocks)
+    src1.copyTo(dst.colRange(0, w));
+    src2.copyTo(dst.colRange(w, 2*w));
+}
+
+
+static void project_onto_jacobian_ECC(const Mat& src1, const Mat& src2, Mat& dst)
+{
+    /* this functions is used for two types of projections. If src1.cols ==src.cols
+    it does a blockwise multiplication (like in the outer product of vectors)
+    of the blocks in matrices src1 and src2 and dst
+    has size (number_of_blcks x number_of_blocks), otherwise dst is a vector of size
+    (number_of_blocks x 1) since src2 is "multiplied"(dot) with each block of src1.
+
+    The number_of_blocks is equal to the number of parameters we are lloking for
+    (i.e. rtanslation:2, euclidean: 3, affine: 6, homography: 8)
+
+    */
+    CV_Assert(src1.rows == src2.rows);
+    CV_Assert((src1.cols % src2.cols) == 0);
+    int w;
+
+    float* dstPtr = dst.ptr<float>(0);
+
+    if (src1.cols !=src2.cols){//dst.cols==1
+        w  = src2.cols;
+        for (int i=0; i<dst.rows; i++){
+            dstPtr[i] = (float) src2.dot(src1.colRange(i*w,(i+1)*w));
+        }
+    }
+
+    else {
+        CV_Assert(dst.cols == dst.rows); //dst is square (and symmetric)
+        w = src2.cols/dst.cols;
+        Mat mat;
+        for (int i=0; i<dst.rows; i++){
+
+            mat = Mat(src1.colRange(i*w, (i+1)*w));
+            dstPtr[i*(dst.rows+1)] = (float) pow(norm(mat),2); //diagonal elements
+
+            for (int j=i+1; j<dst.cols; j++){ //j starts from i+1
+                dstPtr[i*dst.cols+j] = (float) mat.dot(src2.colRange(j*w, (j+1)*w));
+                dstPtr[j*dst.cols+i] = dstPtr[i*dst.cols+j]; //due to symmetry
+            }
+        }
+    }
+}
+
+
+static void update_warping_matrix_ECC (Mat& map_matrix, const Mat& update, const int motionType)
+{
+    CV_Assert (map_matrix.type() == CV_32FC1);
+    CV_Assert (update.type() == CV_32FC1);
+
+    CV_Assert (motionType == MOTION_TRANSLATION || motionType == MOTION_EUCLIDEAN ||
+        motionType == MOTION_AFFINE || motionType == MOTION_HOMOGRAPHY);
+
+    if (motionType == MOTION_HOMOGRAPHY)
+        CV_Assert (map_matrix.rows == 3 && update.rows == 8);
+    else if (motionType == MOTION_AFFINE)
+        CV_Assert(map_matrix.rows == 2 && update.rows == 6);
+    else if (motionType == MOTION_EUCLIDEAN)
+        CV_Assert (map_matrix.rows == 2 && update.rows == 3);
+    else
+        CV_Assert (map_matrix.rows == 2 && update.rows == 2);
+
+    CV_Assert (update.cols == 1);
+
+    CV_Assert( map_matrix.isContinuous());
+    CV_Assert( update.isContinuous() );
+
+
+    float* mapPtr = map_matrix.ptr<float>(0);
+    const float* updatePtr = update.ptr<float>(0);
+
+
+    if (motionType == MOTION_TRANSLATION){
+        mapPtr[2] += updatePtr[0];
+        mapPtr[5] += updatePtr[1];
+    }
+    if (motionType == MOTION_AFFINE) {
+        mapPtr[0] += updatePtr[0];
+        mapPtr[3] += updatePtr[1];
+        mapPtr[1] += updatePtr[2];
+        mapPtr[4] += updatePtr[3];
+        mapPtr[2] += updatePtr[4];
+        mapPtr[5] += updatePtr[5];
+    }
+    if (motionType == MOTION_HOMOGRAPHY) {
+        mapPtr[0] += updatePtr[0];
+        mapPtr[3] += updatePtr[1];
+        mapPtr[6] += updatePtr[2];
+        mapPtr[1] += updatePtr[3];
+        mapPtr[4] += updatePtr[4];
+        mapPtr[7] += updatePtr[5];
+        mapPtr[2] += updatePtr[6];
+        mapPtr[5] += updatePtr[7];
+    }
+    if (motionType == MOTION_EUCLIDEAN) {
+        double new_theta = updatePtr[0];
+        new_theta += asin(mapPtr[3]);
+
+        mapPtr[2] += updatePtr[1];
+        mapPtr[5] += updatePtr[2];
+        mapPtr[0] = mapPtr[4] = (float) cos(new_theta);
+        mapPtr[3] = (float) sin(new_theta);
+        mapPtr[1] = -mapPtr[3];
+    }
+}
+
+
+/** Function that computes enhanced corelation coefficient from Georgios et.al. 2008
+*   See https://github.com/opencv/opencv/issues/12432
+*/
+double cv::computeECC(InputArray templateImage, InputArray inputImage, InputArray inputMask)
+{
+    CV_Assert(!templateImage.empty());
+    CV_Assert(!inputImage.empty());
+
+    if( ! (templateImage.type()==inputImage.type()))
+        CV_Error( Error::StsUnmatchedFormats, "Both input images must have the same data type" );
+
+    Scalar meanTemplate, sdTemplate;
+
+    int active_pixels = inputMask.empty() ? templateImage.size().area() : countNonZero(inputMask);
+
+    meanStdDev(templateImage, meanTemplate, sdTemplate, inputMask);
+    Mat templateImage_zeromean = Mat::zeros(templateImage.size(), templateImage.type());
+    subtract(templateImage, meanTemplate, templateImage_zeromean, inputMask);
+    double templateImagenorm = std::sqrt(active_pixels*sdTemplate.val[0]*sdTemplate.val[0]);
+
+    Scalar meanInput, sdInput;
+
+    Mat inputImage_zeromean = Mat::zeros(inputImage.size(), inputImage.type());
+    meanStdDev(inputImage, meanInput, sdInput, inputMask);
+    subtract(inputImage, meanInput, inputImage_zeromean, inputMask);
+    double inputImagenorm = std::sqrt(active_pixels*sdInput.val[0]*sdInput.val[0]);
+
+    return templateImage_zeromean.dot(inputImage_zeromean)/(templateImagenorm*inputImagenorm);
+}
+
+
+double cv::findTransformECC(InputArray templateImage,
+                            InputArray inputImage,
+                            InputOutputArray warpMatrix,
+                            int motionType,
+                            TermCriteria criteria,
+                            InputArray inputMask,
+                            int gaussFiltSize)
+{
+
+
+    Mat src = templateImage.getMat();//template image
+    Mat dst = inputImage.getMat(); //input image (to be warped)
+    Mat map = warpMatrix.getMat(); //warp (transformation)
+
+    CV_Assert(!src.empty());
+    CV_Assert(!dst.empty());
+
+    // If the user passed an un-initialized warpMatrix, initialize to identity
+    if(map.empty()) {
+        int rowCount = 2;
+        if(motionType == MOTION_HOMOGRAPHY)
+            rowCount = 3;
+
+        warpMatrix.create(rowCount, 3, CV_32FC1);
+        map = warpMatrix.getMat();
+        map = Mat::eye(rowCount, 3, CV_32F);
+    }
+
+    if( ! (src.type()==dst.type()))
+        CV_Error( Error::StsUnmatchedFormats, "Both input images must have the same data type" );
+
+    //accept only 1-channel images
+    if( src.type() != CV_8UC1 && src.type()!= CV_32FC1)
+        CV_Error( Error::StsUnsupportedFormat, "Images must have 8uC1 or 32fC1 type");
+
+    if( map.type() != CV_32FC1)
+        CV_Error( Error::StsUnsupportedFormat, "warpMatrix must be single-channel floating-point matrix");
+
+    CV_Assert (map.cols == 3);
+    CV_Assert (map.rows == 2 || map.rows ==3);
+
+    CV_Assert (motionType == MOTION_AFFINE || motionType == MOTION_HOMOGRAPHY ||
+        motionType == MOTION_EUCLIDEAN || motionType == MOTION_TRANSLATION);
+
+    if (motionType == MOTION_HOMOGRAPHY){
+        CV_Assert (map.rows ==3);
+    }
+
+    CV_Assert (criteria.type & TermCriteria::COUNT || criteria.type & TermCriteria::EPS);
+    const int    numberOfIterations = (criteria.type & TermCriteria::COUNT) ? criteria.maxCount : 200;
+    const double termination_eps    = (criteria.type & TermCriteria::EPS)   ? criteria.epsilon  :  -1;
+
+    int paramTemp = 6;//default: affine
+    switch (motionType){
+      case MOTION_TRANSLATION:
+          paramTemp = 2;
+          break;
+      case MOTION_EUCLIDEAN:
+          paramTemp = 3;
+          break;
+      case MOTION_HOMOGRAPHY:
+          paramTemp = 8;
+          break;
+    }
+
+
+    const int numberOfParameters = paramTemp;
+
+    const int ws = src.cols;
+    const int hs = src.rows;
+    const int wd = dst.cols;
+    const int hd = dst.rows;
+
+    Mat Xcoord = Mat(1, ws, CV_32F);
+    Mat Ycoord = Mat(hs, 1, CV_32F);
+    Mat Xgrid = Mat(hs, ws, CV_32F);
+    Mat Ygrid = Mat(hs, ws, CV_32F);
+
+    float* XcoPtr = Xcoord.ptr<float>(0);
+    float* YcoPtr = Ycoord.ptr<float>(0);
+    int j;
+    for (j=0; j<ws; j++)
+        XcoPtr[j] = (float) j;
+    for (j=0; j<hs; j++)
+        YcoPtr[j] = (float) j;
+
+    repeat(Xcoord, hs, 1, Xgrid);
+    repeat(Ycoord, 1, ws, Ygrid);
+
+    Xcoord.release();
+    Ycoord.release();
+
+    Mat templateZM    = Mat(hs, ws, CV_32F);// to store the (smoothed)zero-mean version of template
+    Mat templateFloat = Mat(hs, ws, CV_32F);// to store the (smoothed) template
+    Mat imageFloat    = Mat(hd, wd, CV_32F);// to store the (smoothed) input image
+    Mat imageWarped   = Mat(hs, ws, CV_32F);// to store the warped zero-mean input image
+    Mat imageMask     = Mat(hs, ws, CV_8U); // to store the final mask
+
+    Mat inputMaskMat = inputMask.getMat();
+    //to use it for mask warping
+    Mat preMask;
+    if(inputMask.empty())
+        preMask = Mat::ones(hd, wd, CV_8U);
+    else
+        threshold(inputMask, preMask, 0, 1, THRESH_BINARY);
+
+    //gaussian filtering is optional
+    src.convertTo(templateFloat, templateFloat.type());
+    GaussianBlur(templateFloat, templateFloat, Size(gaussFiltSize, gaussFiltSize), 0, 0);
+
+    Mat preMaskFloat;
+    preMask.convertTo(preMaskFloat, CV_32F);
+    GaussianBlur(preMaskFloat, preMaskFloat, Size(gaussFiltSize, gaussFiltSize), 0, 0);
+    // Change threshold.
+    preMaskFloat *= (0.5/0.95);
+    // Rounding conversion.
+    preMaskFloat.convertTo(preMask, preMask.type());
+    preMask.convertTo(preMaskFloat, preMaskFloat.type());
+
+    dst.convertTo(imageFloat, imageFloat.type());
+    GaussianBlur(imageFloat, imageFloat, Size(gaussFiltSize, gaussFiltSize), 0, 0);
+
+    // needed matrices for gradients and warped gradients
+    Mat gradientX = Mat::zeros(hd, wd, CV_32FC1);
+    Mat gradientY = Mat::zeros(hd, wd, CV_32FC1);
+    Mat gradientXWarped = Mat(hs, ws, CV_32FC1);
+    Mat gradientYWarped = Mat(hs, ws, CV_32FC1);
+
+
+    // calculate first order image derivatives
+    Matx13f dx(-0.5f, 0.0f, 0.5f);
+
+    filter2D(imageFloat, gradientX, -1, dx);
+    filter2D(imageFloat, gradientY, -1, dx.t());
+
+    gradientX = gradientX.mul(preMaskFloat);
+    gradientY = gradientY.mul(preMaskFloat);
+
+    // matrices needed for solving linear equation system for maximizing ECC
+    Mat jacobian                = Mat(hs, ws*numberOfParameters, CV_32F);
+    Mat hessian                 = Mat(numberOfParameters, numberOfParameters, CV_32F);
+    Mat hessianInv              = Mat(numberOfParameters, numberOfParameters, CV_32F);
+    Mat imageProjection         = Mat(numberOfParameters, 1, CV_32F);
+    Mat templateProjection      = Mat(numberOfParameters, 1, CV_32F);
+    Mat imageProjectionHessian  = Mat(numberOfParameters, 1, CV_32F);
+    Mat errorProjection         = Mat(numberOfParameters, 1, CV_32F);
+
+    Mat deltaP = Mat(numberOfParameters, 1, CV_32F);//transformation parameter correction
+    Mat error = Mat(hs, ws, CV_32F);//error as 2D matrix
+
+    const int imageFlags = INTER_LINEAR  + WARP_INVERSE_MAP;
+    const int maskFlags  = INTER_NEAREST + WARP_INVERSE_MAP;
+
+
+    // iteratively update map_matrix
+    double rho      = -1;
+    double last_rho = - termination_eps;
+    for (int i = 1; (i <= numberOfIterations) && (fabs(rho-last_rho)>= termination_eps); i++)
+    {
+
+        // warp-back portion of the inputImage and gradients to the coordinate space of the templateImage
+        if (motionType != MOTION_HOMOGRAPHY)
+        {
+            warpAffine(imageFloat, imageWarped,     map, imageWarped.size(),     imageFlags);
+            warpAffine(gradientX,  gradientXWarped, map, gradientXWarped.size(), imageFlags);
+            warpAffine(gradientY,  gradientYWarped, map, gradientYWarped.size(), imageFlags);
+            warpAffine(preMask,    imageMask,       map, imageMask.size(),       maskFlags);
+        }
+        else
+        {
+            warpPerspective(imageFloat, imageWarped,     map, imageWarped.size(),     imageFlags);
+            warpPerspective(gradientX,  gradientXWarped, map, gradientXWarped.size(), imageFlags);
+            warpPerspective(gradientY,  gradientYWarped, map, gradientYWarped.size(), imageFlags);
+            warpPerspective(preMask,    imageMask,       map, imageMask.size(),       maskFlags);
+        }
+
+        Scalar imgMean, imgStd, tmpMean, tmpStd;
+        meanStdDev(imageWarped,   imgMean, imgStd, imageMask);
+        meanStdDev(templateFloat, tmpMean, tmpStd, imageMask);
+
+        subtract(imageWarped,   imgMean, imageWarped, imageMask);//zero-mean input
+        templateZM = Mat::zeros(templateZM.rows, templateZM.cols, templateZM.type());
+        subtract(templateFloat, tmpMean, templateZM,  imageMask);//zero-mean template
+
+        const double tmpNorm = std::sqrt(countNonZero(imageMask)*(tmpStd.val[0])*(tmpStd.val[0]));
+        const double imgNorm = std::sqrt(countNonZero(imageMask)*(imgStd.val[0])*(imgStd.val[0]));
+
+        // calculate jacobian of image wrt parameters
+        switch (motionType){
+            case MOTION_AFFINE:
+                image_jacobian_affine_ECC(gradientXWarped, gradientYWarped, Xgrid, Ygrid, jacobian);
+                break;
+            case MOTION_HOMOGRAPHY:
+                image_jacobian_homo_ECC(gradientXWarped, gradientYWarped, Xgrid, Ygrid, map, jacobian);
+                break;
+            case MOTION_TRANSLATION:
+                image_jacobian_translation_ECC(gradientXWarped, gradientYWarped, jacobian);
+                break;
+            case MOTION_EUCLIDEAN:
+                image_jacobian_euclidean_ECC(gradientXWarped, gradientYWarped, Xgrid, Ygrid, map, jacobian);
+                break;
+        }
+
+        // calculate Hessian and its inverse
+        project_onto_jacobian_ECC(jacobian, jacobian, hessian);
+
+        hessianInv = hessian.inv();
+
+        const double correlation = templateZM.dot(imageWarped);
+
+        // calculate enhanced correlation coefficient (ECC)->rho
+        last_rho = rho;
+        rho = correlation/(imgNorm*tmpNorm);
+        if (cvIsNaN(rho)) {
+          CV_Error(Error::StsNoConv, "NaN encountered.");
+        }
+
+        // project images into jacobian
+        project_onto_jacobian_ECC( jacobian, imageWarped, imageProjection);
+        project_onto_jacobian_ECC(jacobian, templateZM, templateProjection);
+
+
+        // calculate the parameter lambda to account for illumination variation
+        imageProjectionHessian = hessianInv*imageProjection;
+        const double lambda_n = (imgNorm*imgNorm) - imageProjection.dot(imageProjectionHessian);
+        const double lambda_d = correlation - templateProjection.dot(imageProjectionHessian);
+        if (lambda_d <= 0.0)
+        {
+            rho = -1;
+            CV_Error(Error::StsNoConv, "The algorithm stopped before its convergence. The correlation is going to be minimized. Images may be uncorrelated or non-overlapped");
+
+        }
+        const double lambda = (lambda_n/lambda_d);
+
+        // estimate the update step delta_p
+        error = lambda*templateZM - imageWarped;
+        project_onto_jacobian_ECC(jacobian, error, errorProjection);
+        deltaP = hessianInv * errorProjection;
+
+        // update warping matrix
+        update_warping_matrix_ECC( map, deltaP, motionType);
+
+
+    }
+
+    // return final correlation coefficient
+    return rho;
+}
+
+double cv::findTransformECC(InputArray templateImage, InputArray inputImage,
+    InputOutputArray warpMatrix, int motionType,
+    TermCriteria criteria,
+    InputArray inputMask)
+{
+    // Use default value of 5 for gaussFiltSize to maintain backward compatibility.
+    return findTransformECC(templateImage, inputImage, warpMatrix, motionType, criteria, inputMask, 5);
+}
+
+/* End of file. */
--- a/Lib/opencv/sources/modules/video/src/kalman.cpp
+++ b/Lib/opencv/sources/modules/video/src/kalman.cpp
@@ -0,0 +1,134 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "precomp.hpp"
+
+namespace cv
+{
+
+KalmanFilter::KalmanFilter() {}
+KalmanFilter::KalmanFilter(int dynamParams, int measureParams, int controlParams, int type)
+{
+    init(dynamParams, measureParams, controlParams, type);
+}
+
+void KalmanFilter::init(int DP, int MP, int CP, int type)
+{
+    CV_Assert( DP > 0 && MP > 0 );
+    CV_Assert( type == CV_32F || type == CV_64F );
+    CP = std::max(CP, 0);
+
+    statePre = Mat::zeros(DP, 1, type);
+    statePost = Mat::zeros(DP, 1, type);
+    transitionMatrix = Mat::eye(DP, DP, type);
+
+    processNoiseCov = Mat::eye(DP, DP, type);
+    measurementMatrix = Mat::zeros(MP, DP, type);
+    measurementNoiseCov = Mat::eye(MP, MP, type);
+
+    errorCovPre = Mat::zeros(DP, DP, type);
+    errorCovPost = Mat::zeros(DP, DP, type);
+    gain = Mat::zeros(DP, MP, type);
+
+    if( CP > 0 )
+        controlMatrix = Mat::zeros(DP, CP, type);
+    else
+        controlMatrix.release();
+
+    temp1.create(DP, DP, type);
+    temp2.create(MP, DP, type);
+    temp3.create(MP, MP, type);
+    temp4.create(MP, DP, type);
+    temp5.create(MP, 1, type);
+}
+
+const Mat& KalmanFilter::predict(const Mat& control)
+{
+    CV_INSTRUMENT_REGION();
+
+    // update the state: x'(k) = A*x(k)
+    statePre = transitionMatrix*statePost;
+
+    if( !control.empty() )
+        // x'(k) = x'(k) + B*u(k)
+        statePre += controlMatrix*control;
+
+    // update error covariance matrices: temp1 = A*P(k)
+    temp1 = transitionMatrix*errorCovPost;
+
+    // P'(k) = temp1*At + Q
+    gemm(temp1, transitionMatrix, 1, processNoiseCov, 1, errorCovPre, GEMM_2_T);
+
+    // handle the case when there will be measurement before the next predict.
+    statePre.copyTo(statePost);
+    errorCovPre.copyTo(errorCovPost);
+
+    return statePre;
+}
+
+const Mat& KalmanFilter::correct(const Mat& measurement)
+{
+    CV_INSTRUMENT_REGION();
+
+    // temp2 = H*P'(k)
+    temp2 = measurementMatrix * errorCovPre;
+
+    // temp3 = temp2*Ht + R
+    gemm(temp2, measurementMatrix, 1, measurementNoiseCov, 1, temp3, GEMM_2_T);
+
+    // temp4 = inv(temp3)*temp2 = Kt(k)
+    solve(temp3, temp2, temp4, DECOMP_SVD);
+
+    // K(k)
+    gain = temp4.t();
+
+    // temp5 = z(k) - H*x'(k)
+    temp5 = measurement - measurementMatrix*statePre;
+
+    // x(k) = x'(k) + K(k)*temp5
+    statePost = statePre + gain*temp5;
+
+    // P(k) = P'(k) - K(k)*temp2
+    errorCovPost = errorCovPre - gain*temp2;
+
+    return statePost;
+}
+
+}
--- a/Lib/opencv/sources/modules/video/src/lkpyramid.cpp
+++ b/Lib/opencv/sources/modules/video/src/lkpyramid.cpp
--- a/Lib/opencv/sources/modules/video/src/lkpyramid.hpp
+++ b/Lib/opencv/sources/modules/video/src/lkpyramid.hpp
@@ -0,0 +1,48 @@
+#pragma once
+
+namespace cv
+{
+namespace detail
+{
+
+    typedef short deriv_type;
+
+    struct SharrDerivInvoker : ParallelLoopBody
+    {
+        SharrDerivInvoker(const Mat& _src, const Mat& _dst)
+            : src(_src), dst(_dst)
+        { }
+
+        void operator()(const Range& range) const CV_OVERRIDE;
+
+        const Mat& src;
+        const Mat& dst;
+    };
+
+    struct LKTrackerInvoker : ParallelLoopBody
+    {
+        LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
+                          const Point2f* _prevPts, Point2f* _nextPts,
+                          uchar* _status, float* _err,
+                          Size _winSize, TermCriteria _criteria,
+                          int _level, int _maxLevel, int _flags, float _minEigThreshold );
+
+        void operator()(const Range& range) const CV_OVERRIDE;
+
+        const Mat* prevImg;
+        const Mat* nextImg;
+        const Mat* prevDeriv;
+        const Point2f* prevPts;
+        Point2f* nextPts;
+        uchar* status;
+        float* err;
+        Size winSize;
+        TermCriteria criteria;
+        int level;
+        int maxLevel;
+        int flags;
+        float minEigThreshold;
+    };
+
+}// namespace detail
+}// namespace cv
--- a/Lib/opencv/sources/modules/video/src/opencl/bgfg_knn.cl
+++ b/Lib/opencv/sources/modules/video/src/opencl/bgfg_knn.cl
@@ -0,0 +1,248 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2018 Ya-Chiu Wu, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Ya-Chiu Wu, yacwu@cs.nctu.edu.tw
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if CN==1
+
+#define T_MEAN float
+#define F_ZERO (0.0f)
+
+#define frameToMean(a, b) (b) = *(a);
+#define meanToFrame(a, b) *b = convert_uchar_sat(a);
+
+#else
+
+#define T_MEAN float4
+#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
+
+#define meanToFrame(a, b)\
+    b[0] = convert_uchar_sat(a.x); \
+    b[1] = convert_uchar_sat(a.y); \
+    b[2] = convert_uchar_sat(a.z);
+
+#define frameToMean(a, b)\
+    b.x = a[0]; \
+    b.y = a[1]; \
+    b.z = a[2]; \
+    b.w = 0.0f;
+
+#endif
+
+__kernel void knn_kernel(__global const uchar* frame, int frame_step, int frame_offset, int frame_row, int frame_col,
+                         __global const uchar* nNextLongUpdate,
+                         __global const uchar* nNextMidUpdate,
+                         __global const uchar* nNextShortUpdate,
+                         __global uchar* aModelIndexLong,
+                         __global uchar* aModelIndexMid,
+                         __global uchar* aModelIndexShort,
+                         __global uchar* flag,
+                         __global uchar* sample,
+                         __global uchar* fgmask, int fgmask_step, int fgmask_offset,
+                         int nLongCounter, int nMidCounter, int nShortCounter,
+                         float c_Tb, int c_nkNN, float c_tau
+#ifdef SHADOW_DETECT
+                         , uchar c_shadowVal
+#endif
+                         )
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if( x < frame_col && y < frame_row)
+    {
+        __global const uchar* _frame = (frame + mad24(y, frame_step, mad24(x, CN, frame_offset)));
+        T_MEAN pix;
+        frameToMean(_frame, pix);
+
+        uchar foreground = 255; // 0 - the pixel classified as background
+
+        int Pbf = 0;
+        int Pb = 0;
+        uchar include = 0;
+
+        int pt_idx =  mad24(y, frame_col, x);
+        int idx_step = frame_row * frame_col;
+
+        __global T_MEAN* _sample = (__global T_MEAN*)(sample);
+
+        for (uchar n = 0; n < (NSAMPLES) * 3 ; ++n)
+        {
+            int n_idx = mad24(n, idx_step, pt_idx);
+
+            T_MEAN c_mean = _sample[n_idx];
+
+            uchar c_flag = flag[n_idx];
+
+            T_MEAN diff = c_mean - pix;
+            float dist2 = dot(diff, diff);
+
+            if (dist2 < c_Tb)
+            {
+                Pbf++;
+                if (c_flag)
+                {
+                    Pb++;
+                    if (Pb >= c_nkNN)
+                    {
+                        include = 1;
+                        foreground = 0;
+                        break;
+                    }
+                }
+            }
+        }
+        if (Pbf >= c_nkNN)
+        {
+            include = 1;
+        }
+
+#ifdef SHADOW_DETECT
+        if (foreground)
+        {
+            int Ps = 0;
+            for (uchar n = 0; n < (NSAMPLES) * 3 ; ++n)
+            {
+                int n_idx = mad24(n, idx_step, pt_idx);
+                uchar c_flag = flag[n_idx];
+
+                if (c_flag)
+                {
+                    T_MEAN c_mean = _sample[n_idx];
+
+                    float numerator = dot(pix, c_mean);
+                    float denominator = dot(c_mean, c_mean);
+
+                    if (denominator == 0)
+                        break;
+
+                    if (numerator <= denominator && numerator >= c_tau * denominator)
+                    {
+                        float a = numerator / denominator;
+
+                        T_MEAN dD = mad(a, c_mean, -pix);
+
+                        if (dot(dD, dD) < c_Tb * a * a)
+                        {
+                            Ps++;
+                            if (Ps >= c_nkNN)
+                            {
+                                foreground = c_shadowVal;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+#endif
+        __global uchar* _fgmask = fgmask + mad24(y, fgmask_step, x + fgmask_offset);
+        *_fgmask = (uchar)foreground;
+
+        __global const uchar* _nNextLongUpdate = nNextLongUpdate + pt_idx;
+        __global const uchar* _nNextMidUpdate = nNextMidUpdate + pt_idx;
+        __global const uchar* _nNextShortUpdate = nNextShortUpdate + pt_idx;
+        __global uchar* _aModelIndexLong = aModelIndexLong + pt_idx;
+        __global uchar* _aModelIndexMid = aModelIndexMid + pt_idx;
+        __global uchar* _aModelIndexShort = aModelIndexShort + pt_idx;
+
+        uchar nextLongUpdate = _nNextLongUpdate[0];
+        uchar nextMidUpdate = _nNextMidUpdate[0];
+        uchar nextShortUpdate = _nNextShortUpdate[0];
+        uchar modelIndexLong = _aModelIndexLong[0];
+        uchar modelIndexMid = _aModelIndexMid[0];
+        uchar modelIndexShort = _aModelIndexShort[0];
+        int offsetLong = mad24(mad24(2, (NSAMPLES), modelIndexLong), idx_step, pt_idx);
+        int offsetMid = mad24((NSAMPLES)+modelIndexMid, idx_step, pt_idx);
+        int offsetShort = mad24(modelIndexShort, idx_step, pt_idx);
+        if (nextLongUpdate == nLongCounter)
+        {
+            _sample[offsetLong] = _sample[offsetMid];
+            flag[offsetLong] = flag[offsetMid];
+            _aModelIndexLong[0] = (modelIndexLong >= ((NSAMPLES)-1)) ? 0 : (modelIndexLong + 1);
+        }
+
+        if (nextMidUpdate == nMidCounter)
+        {
+            _sample[offsetMid] = _sample[offsetShort];
+            flag[offsetMid] = flag[offsetShort];
+            _aModelIndexMid[0] = (modelIndexMid >= ((NSAMPLES)-1)) ? 0 : (modelIndexMid + 1);
+        }
+
+        if (nextShortUpdate == nShortCounter)
+        {
+            _sample[offsetShort] = pix;
+            flag[offsetShort] = include;
+            _aModelIndexShort[0] = (modelIndexShort >= ((NSAMPLES)-1)) ? 0 : (modelIndexShort + 1);
+        }
+    }
+}
+
+__kernel void getBackgroundImage2_kernel(__global const uchar* flag,
+                                         __global const uchar* sample,
+                                         __global uchar* dst, int dst_step, int dst_offset, int dst_row, int dst_col)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < dst_col && y < dst_row)
+    {
+        int pt_idx =  mad24(y, dst_col, x);
+
+        T_MEAN meanVal = (T_MEAN)F_ZERO;
+
+        __global T_MEAN* _sample = (__global T_MEAN*)(sample);
+        int idx_step = dst_row * dst_col;
+        for (uchar n = 0; n < (NSAMPLES) * 3 ; ++n)
+        {
+            int n_idx = mad24(n, idx_step, pt_idx);
+            uchar c_flag = flag[n_idx];
+            if(c_flag)
+            {
+                meanVal = _sample[n_idx];
+                break;
+            }
+        }
+        __global uchar* _dst = dst + mad24(y, dst_step, mad24(x, CN, dst_offset));
+        meanToFrame(meanVal, _dst);
+    }
+}
--- a/Lib/opencv/sources/modules/video/src/opencl/bgfg_mog2.cl
+++ b/Lib/opencv/sources/modules/video/src/opencl/bgfg_mog2.cl
@@ -0,0 +1,282 @@
+#if CN==1
+
+#define T_MEAN float
+#define F_ZERO (0.0f)
+#define cnMode 1
+
+#define frameToMean(a, b) (b) = *(a);
+#if FL==0
+#define meanToFrame(a, b) *b = convert_uchar_sat(a);
+#else
+#define meanToFrame(a, b) *b = (float)a;
+#endif
+
+#else
+
+#define T_MEAN float4
+#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
+#define cnMode 4
+
+#if FL == 0
+#define meanToFrame(a, b)\
+    b[0] = convert_uchar_sat(a.x); \
+    b[1] = convert_uchar_sat(a.y); \
+    b[2] = convert_uchar_sat(a.z);
+#else
+#define meanToFrame(a, b)\
+    b[0] = a.x; \
+    b[1] = a.y; \
+    b[2] = a.z;
+#endif
+
+#define frameToMean(a, b)\
+    b.x = a[0]; \
+    b.y = a[1]; \
+    b.z = a[2]; \
+    b.w = 0.0f;
+
+#endif
+
+__kernel void mog2_kernel(__global const uchar* frame, int frame_step, int frame_offset, int frame_row, int frame_col,  //uchar || uchar3
+                          __global uchar* modesUsed,                                                                    //uchar
+                          __global uchar* weight,                                                                       //float
+                          __global uchar* mean,                                                                         //T_MEAN=float || float4
+                          __global uchar* variance,                                                                     //float
+                          __global uchar* fgmask, int fgmask_step, int fgmask_offset,                                   //uchar
+                          float alphaT, float alpha1, float prune,
+                          float c_Tb, float c_TB, float c_Tg, float c_varMin,                                           //constants
+                          float c_varMax, float c_varInit, float c_tau
+#ifdef SHADOW_DETECT
+                          , uchar c_shadowVal
+#endif
+                          )
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if( x < frame_col && y < frame_row)
+    {
+        #if FL==0
+        __global const uchar* _frame = (frame + mad24(y, frame_step, mad24(x, CN, frame_offset)));
+        #else
+        __global const float* _frame = ((__global const float*)( frame + mad24(y, frame_step, frame_offset)) + mad24(x, CN, 0));
+        #endif
+        T_MEAN pix;
+        frameToMean(_frame, pix);
+
+        uchar foreground = 255; // 0 - the pixel classified as background
+
+        bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+
+        int pt_idx =  mad24(y, frame_col, x);
+        int idx_step = frame_row * frame_col;
+
+        __global uchar* _modesUsed = modesUsed + pt_idx;
+        uchar nmodes = _modesUsed[0];
+
+        float totalWeight = 0.0f;
+
+        __global float* _weight = (__global float*)(weight);
+        __global float* _variance = (__global float*)(variance);
+        __global T_MEAN* _mean = (__global T_MEAN*)(mean);
+
+        uchar mode = 0;
+        for (; mode < nmodes; ++mode)
+        {
+            int mode_idx = mad24(mode, idx_step, pt_idx);
+            float c_weight = mad(alpha1, _weight[mode_idx], prune);
+
+            float c_var = _variance[mode_idx];
+
+            T_MEAN c_mean = _mean[mode_idx];
+
+            T_MEAN diff = c_mean - pix;
+            float dist2 = dot(diff, diff);
+
+            if (totalWeight < c_TB && dist2 < c_Tb * c_var)
+                foreground = 0;
+
+            if (dist2 < c_Tg * c_var)
+            {
+                fitsPDF = true;
+                c_weight += alphaT;
+
+                float k = alphaT / c_weight;
+                T_MEAN mean_new = mad((T_MEAN)-k, diff, c_mean);
+                float variance_new  = clamp(mad(k, (dist2 - c_var), c_var), c_varMin, c_varMax);
+
+                for (int i = mode; i > 0; --i)
+                {
+                    int prev_idx = mode_idx - idx_step;
+                    if (c_weight < _weight[prev_idx])
+                        break;
+
+                    _weight[mode_idx]   = _weight[prev_idx];
+                    _variance[mode_idx] = _variance[prev_idx];
+                    _mean[mode_idx]     = _mean[prev_idx];
+
+                    mode_idx = prev_idx;
+                }
+
+                _mean[mode_idx]     = mean_new;
+                _variance[mode_idx] = variance_new;
+                _weight[mode_idx]   = c_weight; //update weight by the calculated value
+
+                totalWeight += c_weight;
+
+                mode ++;
+
+                break;
+            }
+            if (c_weight < -prune)
+                c_weight = 0.0f;
+
+            _weight[mode_idx] = c_weight; //update weight by the calculated value
+            totalWeight += c_weight;
+        }
+
+        for (; mode < nmodes; ++mode)
+        {
+            int mode_idx = mad24(mode, idx_step, pt_idx);
+            float c_weight = mad(alpha1, _weight[mode_idx], prune);
+
+            if (c_weight < -prune)
+            {
+                c_weight = 0.0f;
+                nmodes = mode;
+                break;
+            }
+            _weight[mode_idx] = c_weight; //update weight by the calculated value
+            totalWeight += c_weight;
+        }
+
+        if (0.f < totalWeight)
+        {
+            totalWeight = 1.f / totalWeight;
+            for (int mode = 0; mode < nmodes; ++mode)
+                _weight[mad24(mode, idx_step, pt_idx)] *= totalWeight;
+        }
+
+        if (!fitsPDF)
+        {
+            uchar mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++;
+            int mode_idx = mad24(mode, idx_step, pt_idx);
+
+            if (nmodes == 1)
+                _weight[mode_idx] = 1.f;
+            else
+            {
+                _weight[mode_idx] = alphaT;
+
+                for (int i = pt_idx; i < mode_idx; i += idx_step)
+                    _weight[i] *= alpha1;
+            }
+
+            for (int i = nmodes - 1; i > 0; --i)
+            {
+                int prev_idx = mode_idx - idx_step;
+                if (alphaT < _weight[prev_idx])
+                    break;
+
+                _weight[mode_idx]   = _weight[prev_idx];
+                _variance[mode_idx] = _variance[prev_idx];
+                _mean[mode_idx]     = _mean[prev_idx];
+
+                mode_idx = prev_idx;
+            }
+
+            _mean[mode_idx] = pix;
+            _variance[mode_idx] = c_varInit;
+        }
+
+        _modesUsed[0] = nmodes;
+#ifdef SHADOW_DETECT
+        if (foreground)
+        {
+            float tWeight = 0.0f;
+
+            for (uchar mode = 0; mode < nmodes; ++mode)
+            {
+                int mode_idx = mad24(mode, idx_step, pt_idx);
+                T_MEAN c_mean = _mean[mode_idx];
+
+                float numerator = dot(pix, c_mean);
+                float denominator = dot(c_mean, c_mean);
+
+                if (denominator == 0)
+                    break;
+
+                if (numerator <= denominator && numerator >= c_tau * denominator)
+                {
+                    float a = numerator / denominator;
+
+                    T_MEAN dD = mad(a, c_mean, -pix);
+
+                    if (dot(dD, dD) < c_Tb * _variance[mode_idx] * a * a)
+                    {
+                        foreground = c_shadowVal;
+                        break;
+                    }
+                }
+
+                tWeight += _weight[mode_idx];
+                if (tWeight > c_TB)
+                    break;
+            }
+        }
+#endif
+        __global uchar* _fgmask = fgmask + mad24(y, fgmask_step, x + fgmask_offset);
+        *_fgmask = (uchar)foreground;
+    }
+}
+
+__kernel void getBackgroundImage2_kernel(__global const uchar* modesUsed,
+                                         __global const uchar* weight,
+                                         __global const uchar* mean,
+                                         __global uchar* dst, int dst_step, int dst_offset, int dst_row, int dst_col,
+                                         float c_TB)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < dst_col && y < dst_row)
+    {
+        int pt_idx =  mad24(y, dst_col, x);
+
+        __global const uchar* _modesUsed = modesUsed + pt_idx;
+        uchar nmodes = _modesUsed[0];
+
+        T_MEAN meanVal = (T_MEAN)F_ZERO;
+
+        float totalWeight = 0.0f;
+        __global const float* _weight = (__global const float*)weight;
+        __global const T_MEAN* _mean = (__global const T_MEAN*)(mean);
+        int idx_step = dst_row * dst_col;
+        for (uchar mode = 0; mode < nmodes; ++mode)
+        {
+            int mode_idx = mad24(mode, idx_step, pt_idx);
+            float c_weight = _weight[mode_idx];
+            T_MEAN c_mean = _mean[mode_idx];
+
+            meanVal = mad(c_weight, c_mean, meanVal);
+
+            totalWeight += c_weight;
+
+            if (totalWeight > c_TB)
+                break;
+        }
+
+        if (0.f < totalWeight)
+            meanVal = meanVal / totalWeight;
+        else
+            meanVal = (T_MEAN)(0.f);
+
+        #if FL==0
+        __global uchar* _dst = dst + mad24(y, dst_step, mad24(x, CN, dst_offset));
+        meanToFrame(meanVal, _dst);
+        #else
+        __global float* _dst = ((__global float*)( dst + mad24(y, dst_step, dst_offset)) + mad24(x, CN, 0));
+        meanToFrame(meanVal, _dst);
+        #endif
+    }
+}
--- a/Lib/opencv/sources/modules/video/src/opencl/dis_flow.cl
+++ b/Lib/opencv/sources/modules/video/src/opencl/dis_flow.cl
@@ -0,0 +1,540 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+//#define CV_USE_SUBGROUPS
+
+#define EPS 0.001f
+#define INF 1E+10F
+
+//#define DIS_BORDER_SIZE xxx
+//#define DIS_PATCH_SIZE xxx
+//#define DIS_PATCH_STRIDE xxx
+
+#define DIS_PATCH_SIZE_HALF (DIS_PATCH_SIZE / 2)
+
+#ifndef DIS_BORDER_SIZE
+
+__kernel void dis_precomputeStructureTensor_hor(__global const short *I0x,
+                                                __global const short *I0y,
+                                                int w, int h, int ws,
+                                                __global float *I0xx_aux_ptr,
+                                                __global float *I0yy_aux_ptr,
+                                                __global float *I0xy_aux_ptr,
+                                                __global float *I0x_aux_ptr,
+                                                __global float *I0y_aux_ptr)
+{
+
+    int i = get_global_id(0);
+
+    if (i >= h) return;
+
+    const __global short *x_row = I0x + i * w;
+    const __global short *y_row = I0y + i * w;
+
+    float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f;
+    float8 x_vec = convert_float8(vload8(0, x_row));
+    float8 y_vec = convert_float8(vload8(0, y_row));
+    sum_xx = dot(x_vec.lo, x_vec.lo) + dot(x_vec.hi, x_vec.hi);
+    sum_yy = dot(y_vec.lo, y_vec.lo) + dot(y_vec.hi, y_vec.hi);
+    sum_xy = dot(x_vec.lo, y_vec.lo) + dot(x_vec.hi, y_vec.hi);
+    sum_x = dot(x_vec.lo, 1.0f) + dot(x_vec.hi, 1.0f);
+    sum_y = dot(y_vec.lo, 1.0f) + dot(y_vec.hi, 1.0f);
+
+    I0xx_aux_ptr[i * ws] = sum_xx;
+    I0yy_aux_ptr[i * ws] = sum_yy;
+    I0xy_aux_ptr[i * ws] = sum_xy;
+    I0x_aux_ptr[i * ws] = sum_x;
+    I0y_aux_ptr[i * ws] = sum_y;
+
+    int js = 1;
+    for (int j = DIS_PATCH_SIZE; j < w; j++)
+    {
+        short x_val1 = x_row[j];
+        short x_val2 = x_row[j - DIS_PATCH_SIZE];
+        short y_val1 = y_row[j];
+        short y_val2 = y_row[j - DIS_PATCH_SIZE];
+        sum_xx += (x_val1 * x_val1 - x_val2 * x_val2);
+        sum_yy += (y_val1 * y_val1 - y_val2 * y_val2);
+        sum_xy += (x_val1 * y_val1 - x_val2 * y_val2);
+        sum_x += (x_val1 - x_val2);
+        sum_y += (y_val1 - y_val2);
+        if ((j - DIS_PATCH_SIZE + 1) % DIS_PATCH_STRIDE == 0)
+        {
+            int index = i * ws + js;
+            I0xx_aux_ptr[index] = sum_xx;
+            I0yy_aux_ptr[index] = sum_yy;
+            I0xy_aux_ptr[index] = sum_xy;
+            I0x_aux_ptr[index] = sum_x;
+            I0y_aux_ptr[index] = sum_y;
+            js++;
+        }
+    }
+}
+
+__kernel void dis_precomputeStructureTensor_ver(__global const float *I0xx_aux_ptr,
+                                                __global const float *I0yy_aux_ptr,
+                                                __global const float *I0xy_aux_ptr,
+                                                __global const float *I0x_aux_ptr,
+                                                __global const float *I0y_aux_ptr,
+                                                int w, int h, int ws,
+                                                __global float *I0xx_ptr,
+                                                __global float *I0yy_ptr,
+                                                __global float *I0xy_ptr,
+                                                __global float *I0x_ptr,
+                                                __global float *I0y_ptr)
+{
+    int j = get_global_id(0);
+
+    if (j >= ws) return;
+
+    float sum_xx, sum_yy, sum_xy, sum_x, sum_y;
+    sum_xx = sum_yy = sum_xy = sum_x = sum_y = 0.0f;
+
+    for (int i = 0; i < DIS_PATCH_SIZE; i++)
+    {
+        sum_xx += I0xx_aux_ptr[i * ws + j];
+        sum_yy += I0yy_aux_ptr[i * ws + j];
+        sum_xy += I0xy_aux_ptr[i * ws + j];
+        sum_x  += I0x_aux_ptr[i * ws + j];
+        sum_y  += I0y_aux_ptr[i * ws + j];
+    }
+    I0xx_ptr[j] = sum_xx;
+    I0yy_ptr[j] = sum_yy;
+    I0xy_ptr[j] = sum_xy;
+    I0x_ptr[j] = sum_x;
+    I0y_ptr[j] = sum_y;
+
+    int is = 1;
+    for (int i = DIS_PATCH_SIZE; i < h; i++)
+    {
+        sum_xx += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - DIS_PATCH_SIZE) * ws + j]);
+        sum_yy += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - DIS_PATCH_SIZE) * ws + j]);
+        sum_xy += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - DIS_PATCH_SIZE) * ws + j]);
+        sum_x  += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - DIS_PATCH_SIZE) * ws + j]);
+        sum_y  += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - DIS_PATCH_SIZE) * ws + j]);
+
+        if ((i - DIS_PATCH_SIZE + 1) % DIS_PATCH_STRIDE == 0)
+        {
+            I0xx_ptr[is * ws + j] = sum_xx;
+            I0yy_ptr[is * ws + j] = sum_yy;
+            I0xy_ptr[is * ws + j] = sum_xy;
+            I0x_ptr[is * ws + j] = sum_x;
+            I0y_ptr[is * ws + j] = sum_y;
+            is++;
+        }
+    }
+}
+
+__kernel void dis_densification(__global const float2 *S_ptr,
+                                __global const uchar *i0, __global const uchar *i1,
+                                int w, int h, int ws,
+                                __global float2 *U_ptr)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+    int i, j;
+
+    if (x >= w || y >= h) return;
+
+    int start_is, end_is;
+    int start_js, end_js;
+
+    end_is = min(y / DIS_PATCH_STRIDE, (h - DIS_PATCH_SIZE) / DIS_PATCH_STRIDE);
+    start_is = max(0, y - DIS_PATCH_SIZE + DIS_PATCH_STRIDE) / DIS_PATCH_STRIDE;
+    start_is = min(start_is, end_is);
+
+    end_js = min(x / DIS_PATCH_STRIDE, (w - DIS_PATCH_SIZE) / DIS_PATCH_STRIDE);
+    start_js = max(0, x - DIS_PATCH_SIZE + DIS_PATCH_STRIDE) / DIS_PATCH_STRIDE;
+    start_js = min(start_js, end_js);
+
+    float sum_coef = 0.0f;
+    float2 sum_U = (float2)(0.0f, 0.0f);
+
+    int i_l, i_u;
+    int j_l, j_u;
+    float i_m, j_m, diff;
+
+    i = y;
+    j = x;
+
+    /* Iterate through all the patches that overlap the current location (i,j) */
+    for (int is = start_is; is <= end_is; is++)
+        for (int js = start_js; js <= end_js; js++)
+        {
+            float2 s_val = S_ptr[is * ws + js];
+            uchar2 i1_vec1, i1_vec2;
+
+            j_m = min(max(j + s_val.x, 0.0f), w - 1.0f - EPS);
+            i_m = min(max(i + s_val.y, 0.0f), h - 1.0f - EPS);
+            j_l = (int)j_m;
+            j_u = j_l + 1;
+            i_l = (int)i_m;
+            i_u = i_l + 1;
+            i1_vec1 = vload2(0, i1 + i_u * w + j_l);
+            i1_vec2 = vload2(0, i1 + i_l * w + j_l);
+            diff = (j_m - j_l) * (i_m - i_l) * i1_vec1.y +
+                   (j_u - j_m) * (i_m - i_l) * i1_vec1.x +
+                   (j_m - j_l) * (i_u - i_m) * i1_vec2.y +
+                   (j_u - j_m) * (i_u - i_m) * i1_vec2.x - i0[i * w + j];
+            float coef = 1.0f / max(1.0f, fabs(diff));
+            sum_U += coef * s_val;
+            sum_coef += coef;
+        }
+
+    float inv_sum_coef = 1.0 / sum_coef;
+    U_ptr[i * w + j] = sum_U * inv_sum_coef;
+}
+
+#else // DIS_BORDER_SIZE
+
+#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \
+    i_I1 = clamp(i + Uy + DIS_BORDER_SIZE, i_lower_limit, i_upper_limit); \
+    j_I1 = clamp(j + Ux + DIS_BORDER_SIZE, j_lower_limit, j_upper_limit); \
+    { \
+        float di = i_I1 - floor(i_I1); \
+        float dj = j_I1 - floor(j_I1); \
+        w11 = di       * dj; \
+        w10 = di       * (1 - dj); \
+        w01 = (1 - di) * dj; \
+        w00 = (1 - di) * (1 - dj); \
+    }
+
+float computeSSDMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr,
+                         int I0_stride, int I1_stride,
+                         float w00, float w01, float w10, float w11, int i
+#ifndef CV_USE_SUBGROUPS
+                         , __local float2 *smem /*[8]*/
+#endif
+)
+{
+    float sum_diff = 0.0f, sum_diff_sq = 0.0f;
+    int n = DIS_PATCH_SIZE * DIS_PATCH_SIZE;
+
+    uchar8 I1_vec1, I1_vec2, I0_vec;
+    uchar I1_val1, I1_val2;
+
+    I0_vec  = vload8(0, I0_ptr + i * I0_stride);
+    I1_vec1 = vload8(0, I1_ptr + i * I1_stride);
+    I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride);
+    I1_val1 = I1_ptr[i * I1_stride + 8];
+    I1_val2 = I1_ptr[(i + 1) * I1_stride + 8];
+
+    float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) +
+                 w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) -
+                 convert_float8(I0_vec);
+
+    sum_diff = (dot(vec.lo, 1.0) + dot(vec.hi, 1.0));
+    sum_diff_sq = (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi));
+
+#ifdef CV_USE_SUBGROUPS
+    sum_diff = sub_group_reduce_add(sum_diff);
+    sum_diff_sq = sub_group_reduce_add(sum_diff_sq);
+#else
+    barrier(CLK_LOCAL_MEM_FENCE);
+    smem[i] = (float2)(sum_diff, sum_diff_sq);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (i < 4)
+        smem[i] += smem[i + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (i < 2)
+        smem[i] += smem[i + 2];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (i == 0)
+        smem[0] += smem[1];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    float2 reduce_add_result = smem[0];
+    sum_diff = reduce_add_result.x;
+    sum_diff_sq = reduce_add_result.y;
+#endif
+
+    return sum_diff_sq - sum_diff * sum_diff / n;
+}
+
+__attribute__((reqd_work_group_size(8, 1, 1)))
+__kernel void dis_patch_inverse_search_fwd_1(__global const float2 *U_ptr,
+                                             __global const uchar *I0_ptr, __global const uchar *I1_ptr,
+                                             int w, int h, int ws, int hs,
+                                             __global float2 *S_ptr)
+{
+    int id = get_global_id(0);
+    int is = get_group_id(0);
+
+    int i = is * DIS_PATCH_STRIDE;
+    int j = 0;
+    int w_ext = w + 2 * DIS_BORDER_SIZE;
+
+    float i_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
+    float i_upper_limit = DIS_BORDER_SIZE + h - 1.0f;
+    float j_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
+    float j_upper_limit = DIS_BORDER_SIZE + w - 1.0f;
+
+    float2 prev_U = U_ptr[(i + DIS_PATCH_SIZE_HALF) * w + j + DIS_PATCH_SIZE_HALF];
+    S_ptr[is * ws] = prev_U;
+    j += DIS_PATCH_STRIDE;
+
+#ifdef CV_USE_SUBGROUPS
+    int sid = get_sub_group_local_id();
+#define EXTRA_ARGS_computeSSDMeanNorm sid
+#else
+    __local float2 smem[8];
+    int sid = get_local_id(0);
+#define EXTRA_ARGS_computeSSDMeanNorm sid, smem
+#endif
+    for (int js = 1; js < ws; js++, j += DIS_PATCH_STRIDE)
+    {
+        float2 U = U_ptr[(i + DIS_PATCH_SIZE_HALF) * w + j + DIS_PATCH_SIZE_HALF];
+
+        float i_I1, j_I1, w00, w01, w10, w11;
+
+        INIT_BILINEAR_WEIGHTS(U.x, U.y);
+        float min_SSD = computeSSDMeanNorm(
+                I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
+                w, w_ext, w00, w01, w10, w11, EXTRA_ARGS_computeSSDMeanNorm);
+
+        INIT_BILINEAR_WEIGHTS(prev_U.x, prev_U.y);
+        float cur_SSD = computeSSDMeanNorm(
+                I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
+                w, w_ext, w00, w01, w10, w11, EXTRA_ARGS_computeSSDMeanNorm);
+
+        prev_U = (cur_SSD < min_SSD) ? prev_U : U;
+        S_ptr[is * ws + js] = prev_U;
+    }
+
+#undef EXTRA_ARGS_computeSSDMeanNorm
+}
+#endif // DIS_BORDER_SIZE
+
+float4 processPatchMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr,
+                            const __global short *I0x_ptr, const __global short *I0y_ptr,
+                            int I0_stride, int I1_stride, float w00, float w01, float w10,
+                            float w11, float x_grad_sum, float y_grad_sum)
+{
+    const float inv_n = 1.0f / (float)(DIS_PATCH_SIZE * DIS_PATCH_SIZE);
+
+    float sum_diff = 0.0, sum_diff_sq = 0.0;
+    float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0;
+
+    uchar8 I1_vec1;
+    uchar8 I1_vec2 = vload8(0, I1_ptr);
+    uchar I1_val1;
+    uchar I1_val2 = I1_ptr[DIS_PATCH_SIZE];
+
+    for (int i = 0; i < 8; i++)
+    {
+        uchar8 I0_vec = vload8(0, I0_ptr + i * I0_stride);
+
+        I1_vec1 = I1_vec2;
+        I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride);
+        I1_val1 = I1_val2;
+        I1_val2 = I1_ptr[(i + 1) * I1_stride + DIS_PATCH_SIZE];
+
+        float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) +
+                     w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) -
+                     convert_float8(I0_vec);
+
+        sum_diff += (dot(vec.lo, 1.0) + dot(vec.hi, 1.0));
+        sum_diff_sq += (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi));
+
+        short8 I0x_vec = vload8(0, I0x_ptr + i * I0_stride);
+        short8 I0y_vec = vload8(0, I0y_ptr + i * I0_stride);
+
+        sum_I0x_mul += dot(vec.lo, convert_float4(I0x_vec.lo));
+        sum_I0x_mul += dot(vec.hi, convert_float4(I0x_vec.hi));
+        sum_I0y_mul += dot(vec.lo, convert_float4(I0y_vec.lo));
+        sum_I0y_mul += dot(vec.hi, convert_float4(I0y_vec.hi));
+    }
+
+    float dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum * inv_n;
+    float dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum * inv_n;
+    float SSD = sum_diff_sq - sum_diff * sum_diff * inv_n;
+
+    return (float4)(SSD, dst_dUx, dst_dUy, 0);
+}
+
+#ifdef DIS_BORDER_SIZE
+__kernel void dis_patch_inverse_search_fwd_2(__global const float2 *U_ptr,
+                                             __global const uchar *I0_ptr, __global const uchar *I1_ptr,
+                                             __global const short *I0x_ptr, __global const short *I0y_ptr,
+                                             __global const float *xx_ptr, __global const float *yy_ptr,
+                                             __global const float *xy_ptr,
+                                             __global const float *x_ptr, __global const float *y_ptr,
+                                             int w, int h, int ws, int hs, int num_inner_iter,
+                                             __global float2 *S_ptr)
+{
+    int js = get_global_id(0);
+    int is = get_global_id(1);
+    int i = is * DIS_PATCH_STRIDE;
+    int j = js * DIS_PATCH_STRIDE;
+    const int psz = DIS_PATCH_SIZE;
+    int w_ext = w + 2 * DIS_BORDER_SIZE;
+    int index = is * ws + js;
+
+    if (js >= ws || is >= hs) return;
+
+    float2 U0 = S_ptr[index];
+    float2 cur_U = U0;
+    float cur_xx = xx_ptr[index];
+    float cur_yy = yy_ptr[index];
+    float cur_xy = xy_ptr[index];
+    float detH = cur_xx * cur_yy - cur_xy * cur_xy;
+
+    float inv_detH = (fabs(detH) < EPS) ? 1.0 / EPS : 1.0 / detH;
+    float invH11 = cur_yy * inv_detH;
+    float invH12 = -cur_xy * inv_detH;
+    float invH22 = cur_xx * inv_detH;
+
+    float prev_SSD = INF;
+    float x_grad_sum = x_ptr[index];
+    float y_grad_sum = y_ptr[index];
+
+    const float i_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
+    const float i_upper_limit = DIS_BORDER_SIZE + h - 1.0f;
+    const float j_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
+    const float j_upper_limit = DIS_BORDER_SIZE + w - 1.0f;
+
+    for (int t = 0; t < num_inner_iter; t++)
+    {
+        float i_I1, j_I1, w00, w01, w10, w11;
+        INIT_BILINEAR_WEIGHTS(cur_U.x, cur_U.y);
+        float4 res = processPatchMeanNorm(
+                I0_ptr  + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
+                I0x_ptr + i * w + j, I0y_ptr + i * w + j,
+                w, w_ext, w00, w01, w10, w11,
+                x_grad_sum, y_grad_sum);
+
+        float SSD = res.x;
+        float dUx = res.y;
+        float dUy = res.z;
+        float dx = invH11 * dUx + invH12 * dUy;
+        float dy = invH12 * dUx + invH22 * dUy;
+
+        cur_U -= (float2)(dx, dy);
+
+        if (SSD >= prev_SSD)
+            break;
+        prev_SSD = SSD;
+    }
+
+    float2 vec = cur_U - U0;
+    S_ptr[index] = (dot(vec, vec) <= (float)(DIS_PATCH_SIZE * DIS_PATCH_SIZE)) ? cur_U : U0;
+}
+
+__attribute__((reqd_work_group_size(8, 1, 1)))
+__kernel void dis_patch_inverse_search_bwd_1(__global const uchar *I0_ptr, __global const uchar *I1_ptr,
+                                             int w, int h, int ws, int hs,
+                                             __global float2 *S_ptr)
+{
+    int id = get_global_id(0);
+    int is = get_group_id(0);
+
+    is = (hs - 1 - is);
+    int i = is * DIS_PATCH_STRIDE;
+    int j = (ws - 2) * DIS_PATCH_STRIDE;
+    const int w_ext = w + 2 * DIS_BORDER_SIZE;
+
+    const float i_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
+    const float i_upper_limit = DIS_BORDER_SIZE + h - 1.0f;
+    const float j_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
+    const float j_upper_limit = DIS_BORDER_SIZE + w - 1.0f;
+
+#ifdef CV_USE_SUBGROUPS
+    int sid = get_sub_group_local_id();
+#define EXTRA_ARGS_computeSSDMeanNorm sid
+#else
+    __local float2 smem[8];
+    int sid = get_local_id(0);
+#define EXTRA_ARGS_computeSSDMeanNorm sid, smem
+#endif
+
+    for (int js = (ws - 2); js > -1; js--, j -= DIS_PATCH_STRIDE)
+    {
+        float2 U0 = S_ptr[is * ws + js];
+        float2 U1 = S_ptr[is * ws + js + 1];
+
+        float i_I1, j_I1, w00, w01, w10, w11;
+
+        INIT_BILINEAR_WEIGHTS(U0.x, U0.y);
+        float min_SSD = computeSSDMeanNorm(
+                I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
+                w, w_ext, w00, w01, w10, w11, EXTRA_ARGS_computeSSDMeanNorm);
+
+        INIT_BILINEAR_WEIGHTS(U1.x, U1.y);
+        float cur_SSD = computeSSDMeanNorm(
+                I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
+                w, w_ext, w00, w01, w10, w11, EXTRA_ARGS_computeSSDMeanNorm);
+
+        S_ptr[is * ws + js] = (cur_SSD < min_SSD) ? U1 : U0;
+    }
+
+#undef EXTRA_ARGS_computeSSDMeanNorm
+}
+
+__kernel void dis_patch_inverse_search_bwd_2(__global const uchar *I0_ptr, __global const uchar *I1_ptr,
+                                             __global const short *I0x_ptr, __global const short *I0y_ptr,
+                                             __global const float *xx_ptr, __global const float *yy_ptr,
+                                             __global const float *xy_ptr,
+                                             __global const float *x_ptr, __global const float *y_ptr,
+                                             int w, int h, int ws, int hs, int num_inner_iter,
+                                             __global float2 *S_ptr)
+{
+    int js = get_global_id(0);
+    int is = get_global_id(1);
+    if (js >= ws || is >= hs) return;
+
+    js = (ws - 1 - js);
+    is = (hs - 1 - is);
+
+    int j = js * DIS_PATCH_STRIDE;
+    int i = is * DIS_PATCH_STRIDE;
+    int w_ext = w + 2 * DIS_BORDER_SIZE;
+    int index = is * ws + js;
+
+    float2 U0 = S_ptr[index];
+    float2 cur_U = U0;
+    float cur_xx = xx_ptr[index];
+    float cur_yy = yy_ptr[index];
+    float cur_xy = xy_ptr[index];
+    float detH = cur_xx * cur_yy - cur_xy * cur_xy;
+
+    float inv_detH = (fabs(detH) < EPS) ? 1.0 / EPS : 1.0 / detH;
+    float invH11 = cur_yy * inv_detH;
+    float invH12 = -cur_xy * inv_detH;
+    float invH22 = cur_xx * inv_detH;
+
+    float prev_SSD = INF;
+    float x_grad_sum = x_ptr[index];
+    float y_grad_sum = y_ptr[index];
+
+    const float i_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
+    const float i_upper_limit = DIS_BORDER_SIZE + h - 1.0f;
+    const float j_lower_limit = DIS_BORDER_SIZE - DIS_PATCH_SIZE + 1.0f;
+    const float j_upper_limit = DIS_BORDER_SIZE + w - 1.0f;
+
+    for (int t = 0; t < num_inner_iter; t++)
+    {
+        float i_I1, j_I1, w00, w01, w10, w11;
+        INIT_BILINEAR_WEIGHTS(cur_U.x, cur_U.y);
+        float4 res = processPatchMeanNorm(
+                I0_ptr  + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1,
+                I0x_ptr + i * w + j, I0y_ptr + i * w + j,
+                w, w_ext, w00, w01, w10, w11,
+                x_grad_sum, y_grad_sum);
+
+        float SSD = res.x;
+        float dUx = res.y;
+        float dUy = res.z;
+        float dx = invH11 * dUx + invH12 * dUy;
+        float dy = invH12 * dUx + invH22 * dUy;
+
+        cur_U -= (float2)(dx, dy);
+
+        if (SSD >= prev_SSD)
+            break;
+        prev_SSD = SSD;
+    }
+
+    float2 vec = cur_U - U0;
+    S_ptr[index] = ((dot(vec, vec)) <= (float)(DIS_PATCH_SIZE * DIS_PATCH_SIZE)) ? cur_U : U0;
+}
+#endif // DIS_BORDER_SIZE
--- a/Lib/opencv/sources/modules/video/src/opencl/optical_flow_farneback.cl
+++ b/Lib/opencv/sources/modules/video/src/opencl/optical_flow_farneback.cl
@@ -0,0 +1,429 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Sen Liu, swjtuls1987@126.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#define tx  (int)get_local_id(0)
+#define ty  get_local_id(1)
+#define bx  get_group_id(0)
+#define bdx (int)get_local_size(0)
+
+#define BORDER_SIZE 5
+#define MAX_KSIZE_HALF 100
+
+#ifndef polyN
+#define polyN 5
+#endif
+
+#if USE_DOUBLE
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#define TYPE double
+#define VECTYPE double4
+#else
+#define TYPE float
+#define VECTYPE float4
+#endif
+
+__kernel void polynomialExpansion(__global __const float * src, int srcStep,
+                                  __global float * dst, int dstStep,
+                                  const int rows, const  int cols,
+                                  __global __const float * c_g,
+                                  __global __const float * c_xg,
+                                  __global __const float * c_xxg,
+                                  __local float * smem,
+                                  const VECTYPE ig)
+{
+    const int y = get_global_id(1);
+    const int x = bx * (bdx - 2*polyN) + tx - polyN;
+
+    int xWarped;
+    __local float *row = smem + tx;
+
+    if (y < rows && y >= 0)
+    {
+        xWarped = min(max(x, 0), cols - 1);
+
+        row[0] = src[mad24(y, srcStep, xWarped)] * c_g[0];
+        row[bdx] = 0.f;
+        row[2*bdx] = 0.f;
+
+#pragma unroll
+        for (int k = 1; k <= polyN; ++k)
+        {
+            float t0 = src[mad24(max(y - k, 0), srcStep, xWarped)];
+            float t1 = src[mad24(min(y + k, rows - 1), srcStep, xWarped)];
+
+            row[0] += c_g[k] * (t0 + t1);
+            row[bdx] += c_xg[k] * (t1 - t0);
+            row[2*bdx] += c_xxg[k] * (t0 + t1);
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (y < rows && y >= 0 && tx >= polyN && tx + polyN < bdx && x < cols)
+    {
+        TYPE b1 = c_g[0] * row[0];
+        TYPE b3 = c_g[0] * row[bdx];
+        TYPE b5 = c_g[0] * row[2*bdx];
+        TYPE b2 = 0, b4 = 0, b6 = 0;
+
+#pragma unroll
+        for (int k = 1; k <= polyN; ++k)
+        {
+            b1 += (row[k] + row[-k]) * c_g[k];
+            b4 += (row[k] + row[-k]) * c_xxg[k];
+            b2 += (row[k] - row[-k]) * c_xg[k];
+            b3 += (row[k + bdx] + row[-k + bdx]) * c_g[k];
+            b6 += (row[k + bdx] - row[-k + bdx]) * c_xg[k];
+            b5 += (row[k + 2*bdx] + row[-k + 2*bdx]) * c_g[k];
+        }
+
+        dst[mad24(y, dstStep, xWarped)] = (float)(b3*ig.s0);
+        dst[mad24(rows + y, dstStep, xWarped)] = (float)(b2*ig.s0);
+        dst[mad24(2*rows + y, dstStep, xWarped)] = (float)(b1*ig.s1 + b5*ig.s2);
+        dst[mad24(3*rows + y, dstStep, xWarped)] = (float)(b1*ig.s1 + b4*ig.s2);
+        dst[mad24(4*rows + y, dstStep, xWarped)] = (float)(b6*ig.s3);
+    }
+}
+
+inline int idx_row_low(const int y, const int last_row)
+{
+    return abs(y) % (last_row + 1);
+}
+
+inline int idx_row_high(const int y, const int last_row)
+{
+    return abs(last_row - abs(last_row - y)) % (last_row + 1);
+}
+
+inline int idx_col_low(const int x, const int last_col)
+{
+    return abs(x) % (last_col + 1);
+}
+
+inline int idx_col_high(const int x, const int last_col)
+{
+    return abs(last_col - abs(last_col - x)) % (last_col + 1);
+}
+
+inline int idx_col(const int x, const int last_col)
+{
+    return idx_col_low(idx_col_high(x, last_col), last_col);
+}
+
+__kernel void gaussianBlur(__global const float * src, int srcStep,
+                           __global float * dst, int dstStep, const int rows, const  int cols,
+                           __global const float * c_gKer, const int ksizeHalf,
+                           __local float * smem)
+{
+    const int y = get_global_id(1);
+    const int x = get_global_id(0);
+
+    __local float *row = smem + ty * (bdx + 2*ksizeHalf);
+
+    if (y < rows)
+    {
+        // Vertical pass
+        for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
+        {
+            int xExt = (int)(bx * bdx) + i - ksizeHalf;
+            xExt = idx_col(xExt, cols - 1);
+            row[i] = src[mad24(y, srcStep, xExt)] * c_gKer[0];
+            for (int j = 1; j <= ksizeHalf; ++j)
+                row[i] += (src[mad24(idx_row_low(y - j, rows - 1), srcStep, xExt)]
+                           + src[mad24(idx_row_high(y + j, rows - 1), srcStep, xExt)]) * c_gKer[j];
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (y < rows && y >= 0 && x < cols && x >= 0)
+    {
+        // Horizontal pass
+        row += tx + ksizeHalf;
+        float res = row[0] * c_gKer[0];
+        for (int i = 1; i <= ksizeHalf; ++i)
+            res += (row[-i] + row[i]) * c_gKer[i];
+
+        dst[mad24(y, dstStep, x)] = res;
+    }
+}
+
+__kernel void gaussianBlur5(__global const float * src, int srcStep,
+                            __global float * dst, int dstStep,
+                            const int rows, const  int cols,
+                            __global const float * c_gKer, const int ksizeHalf,
+                            __local float * smem)
+{
+    const int y = get_global_id(1);
+    const int x = get_global_id(0);
+
+    const int smw = bdx + 2*ksizeHalf; // shared memory "cols"
+    __local volatile float *row = smem + 5 * ty * smw;
+
+    if (y < rows)
+    {
+        // Vertical pass
+        for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
+        {
+            int xExt = (int)(bx * bdx) + i - ksizeHalf;
+            xExt = idx_col(xExt, cols - 1);
+
+#pragma unroll
+            for (int k = 0; k < 5; ++k)
+                row[k*smw + i] = src[mad24(k*rows + y, srcStep, xExt)] * c_gKer[0];
+
+            for (int j = 1; j <= ksizeHalf; ++j)
+#pragma unroll
+                for (int k = 0; k < 5; ++k)
+                    row[k*smw + i] +=
+                        (src[mad24(k*rows + idx_row_low(y - j, rows - 1), srcStep, xExt)] +
+                         src[mad24(k*rows + idx_row_high(y + j, rows - 1), srcStep, xExt)]) * c_gKer[j];
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (y < rows && y >= 0 && x < cols && x >= 0)
+    {
+        // Horizontal pass
+
+        row += tx + ksizeHalf;
+        float res[5];
+
+#pragma unroll
+        for (int k = 0; k < 5; ++k)
+            res[k] = row[k*smw] * c_gKer[0];
+
+        for (int i = 1; i <= ksizeHalf; ++i)
+#pragma unroll
+            for (int k = 0; k < 5; ++k)
+                res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i];
+
+#pragma unroll
+        for (int k = 0; k < 5; ++k)
+            dst[mad24(k*rows + y, dstStep, x)] = res[k];
+    }
+}
+__constant float c_border[BORDER_SIZE + 1] = { 0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f };
+
+__kernel void updateMatrices(__global const float * flowx, int xStep,
+                             __global const float * flowy, int yStep,
+                             const int rows, const int cols,
+                             __global const float * R0, int R0Step,
+                             __global const float * R1, int R1Step,
+                             __global float * M, int mStep)
+{
+    const int y = get_global_id(1);
+    const int x = get_global_id(0);
+
+    if (y < rows && y >= 0 && x < cols && x >= 0)
+    {
+        float dx = flowx[mad24(y, xStep, x)];
+        float dy = flowy[mad24(y, yStep, x)];
+        float fx = x + dx;
+        float fy = y + dy;
+
+        int x1 = convert_int(floor(fx));
+        int y1 = convert_int(floor(fy));
+        fx -= x1;
+        fy -= y1;
+
+        float r2, r3, r4, r5, r6;
+
+        if (x1 >= 0 && y1 >= 0 && x1 < cols - 1 && y1 < rows - 1)
+        {
+            float a00 = (1.f - fx) * (1.f - fy);
+            float a01 = fx * (1.f - fy);
+            float a10 = (1.f - fx) * fy;
+            float a11 = fx * fy;
+
+            r2 = a00 * R1[mad24(y1, R1Step, x1)] +
+                 a01 * R1[mad24(y1, R1Step, x1 + 1)] +
+                 a10 * R1[mad24(y1 + 1, R1Step, x1)] +
+                 a11 * R1[mad24(y1 + 1, R1Step, x1 + 1)];
+
+            r3 = a00 * R1[mad24(rows + y1, R1Step, x1)] +
+                 a01 * R1[mad24(rows + y1, R1Step, x1 + 1)] +
+                 a10 * R1[mad24(rows + y1 + 1, R1Step, x1)] +
+                 a11 * R1[mad24(rows + y1 + 1, R1Step, x1 + 1)];
+
+            r4 = a00 * R1[mad24(2*rows + y1, R1Step, x1)] +
+                 a01 * R1[mad24(2*rows + y1, R1Step, x1 + 1)] +
+                 a10 * R1[mad24(2*rows + y1 + 1, R1Step, x1)] +
+                 a11 * R1[mad24(2*rows + y1 + 1, R1Step, x1 + 1)];
+
+            r5 = a00 * R1[mad24(3*rows + y1, R1Step, x1)] +
+                 a01 * R1[mad24(3*rows + y1, R1Step, x1 + 1)] +
+                 a10 * R1[mad24(3*rows + y1 + 1, R1Step, x1)] +
+                 a11 * R1[mad24(3*rows + y1 + 1, R1Step, x1 + 1)];
+
+            r6 = a00 * R1[mad24(4*rows + y1, R1Step, x1)] +
+                 a01 * R1[mad24(4*rows + y1, R1Step, x1 + 1)] +
+                 a10 * R1[mad24(4*rows + y1 + 1, R1Step, x1)] +
+                 a11 * R1[mad24(4*rows + y1 + 1, R1Step, x1 + 1)];
+
+            r4 = (R0[mad24(2*rows + y, R0Step, x)] + r4) * 0.5f;
+            r5 = (R0[mad24(3*rows + y, R0Step, x)] + r5) * 0.5f;
+            r6 = (R0[mad24(4*rows + y, R0Step, x)] + r6) * 0.25f;
+        }
+        else
+        {
+            r2 = r3 = 0.f;
+            r4 = R0[mad24(2*rows + y, R0Step, x)];
+            r5 = R0[mad24(3*rows + y, R0Step, x)];
+            r6 = R0[mad24(4*rows + y, R0Step, x)] * 0.5f;
+        }
+
+        r2 = (R0[mad24(y, R0Step, x)] - r2) * 0.5f;
+        r3 = (R0[mad24(rows + y, R0Step, x)] - r3) * 0.5f;
+
+        r2 += r4*dy + r6*dx;
+        r3 += r6*dy + r5*dx;
+
+        float scale =
+            c_border[min(x, BORDER_SIZE)] *
+            c_border[min(y, BORDER_SIZE)] *
+            c_border[min(cols - x - 1, BORDER_SIZE)] *
+            c_border[min(rows - y - 1, BORDER_SIZE)];
+
+        r2 *= scale;
+        r3 *= scale;
+        r4 *= scale;
+        r5 *= scale;
+        r6 *= scale;
+
+        M[mad24(y, mStep, x)] = r4*r4 + r6*r6;
+        M[mad24(rows + y, mStep, x)] = (r4 + r5)*r6;
+        M[mad24(2*rows + y, mStep, x)] = r5*r5 + r6*r6;
+        M[mad24(3*rows + y, mStep, x)] = r4*r2 + r6*r3;
+        M[mad24(4*rows + y, mStep, x)] = r6*r2 + r5*r3;
+    }
+}
+
+__kernel void boxFilter5(__global const float * src, int srcStep,
+                         __global float * dst, int dstStep,
+                         const int rows, const  int cols,
+                         const int ksizeHalf,
+                         __local float * smem)
+{
+    const int y = get_global_id(1);
+    const int x = get_global_id(0);
+
+    const float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
+    const int smw = bdx + 2*ksizeHalf; // shared memory "width"
+    __local float *row = smem + 5 * ty * smw;
+
+    if (y < rows)
+    {
+        // Vertical pass
+        for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
+        {
+            int xExt = (int)(bx * bdx) + i - ksizeHalf;
+            xExt = min(max(xExt, 0), cols - 1);
+
+#pragma unroll
+            for (int k = 0; k < 5; ++k)
+                row[k*smw + i] = src[mad24(k*rows + y, srcStep, xExt)];
+
+            for (int j = 1; j <= ksizeHalf; ++j)
+#pragma unroll
+                for (int k = 0; k < 5; ++k)
+                    row[k*smw + i] +=
+                        src[mad24(k*rows + max(y - j, 0), srcStep, xExt)] +
+                        src[mad24(k*rows + min(y + j, rows - 1), srcStep, xExt)];
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (y < rows && y >= 0 && x < cols && x >= 0)
+    {
+        // Horizontal pass
+
+        row += tx + ksizeHalf;
+        float res[5];
+
+#pragma unroll
+        for (int k = 0; k < 5; ++k)
+            res[k] = row[k*smw];
+
+        for (int i = 1; i <= ksizeHalf; ++i)
+#pragma unroll
+            for (int k = 0; k < 5; ++k)
+                res[k] += row[k*smw - i] + row[k*smw + i];
+
+#pragma unroll
+        for (int k = 0; k < 5; ++k)
+            dst[mad24(k*rows + y, dstStep, x)] = res[k] * boxAreaInv;
+    }
+}
+
+__kernel void updateFlow(__global const float * M, int mStep,
+                         __global float * flowx, int xStep,
+                         __global float * flowy, int yStep,
+                         const int rows, const int cols)
+{
+    const int y = get_global_id(1);
+    const int x = get_global_id(0);
+
+    if (y < rows && y >= 0 && x < cols && x >= 0)
+    {
+        float g11 = M[mad24(y, mStep, x)];
+        float g12 = M[mad24(rows + y, mStep, x)];
+        float g22 = M[mad24(2*rows + y, mStep, x)];
+        float h1 =  M[mad24(3*rows + y, mStep, x)];
+        float h2 =  M[mad24(4*rows + y, mStep, x)];
+
+        float detInv = 1.f / (g11*g22 - g12*g12 + 1e-3f);
+
+        flowx[mad24(y, xStep, x)] = (g11*h2 - g12*h1) * detInv;
+        flowy[mad24(y, yStep, x)] = (g22*h1 - g12*h2) * detInv;
+    }
+}
--- a/Lib/opencv/sources/modules/video/src/opencl/pyrlk.cl
+++ b/Lib/opencv/sources/modules/video/src/opencl/pyrlk.cl
@@ -0,0 +1,558 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Dachuan Zhao, dachuan@multicorewareinc.com
+//    Yao Wang, bitwangyaoyao@gmail.com
+//    Xiaopeng Fu, fuxiaopeng2222@163.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define GRIDSIZE    3
+#define LSx 8
+#define LSy 8
+// define local memory sizes
+#define LM_W (LSx*GRIDSIZE+2)
+#define LM_H (LSy*GRIDSIZE+2)
+#define BUFFER  (LSx*LSy)
+#define BUFFER2 BUFFER>>1
+
+#ifdef CPU
+
+inline void reduce3(float val1, float val2, float val3,  __local float* smem1,  __local float* smem2,  __local float* smem3, int tid)
+{
+    smem1[tid] = val1;
+    smem2[tid] = val2;
+    smem3[tid] = val3;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for(int i = BUFFER2; i > 0; i >>= 1)
+    {
+        if(tid < i)
+        {
+            smem1[tid] += smem1[tid + i];
+            smem2[tid] += smem2[tid + i];
+            smem3[tid] += smem3[tid + i];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+}
+
+inline void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
+{
+    smem1[tid] = val1;
+    smem2[tid] = val2;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for(int i = BUFFER2; i > 0; i >>= 1)
+    {
+        if(tid < i)
+        {
+            smem1[tid] += smem1[tid + i];
+            smem2[tid] += smem2[tid + i];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+}
+
+inline void reduce1(float val1, __local float* smem1, int tid)
+{
+    smem1[tid] = val1;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for(int i = BUFFER2; i > 0; i >>= 1)
+    {
+        if(tid < i)
+        {
+            smem1[tid] += smem1[tid + i];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+}
+#else
+inline void reduce3(float val1, float val2, float val3,
+             __local float* smem1, __local float* smem2, __local float* smem3, int tid)
+{
+    smem1[tid] = val1;
+    smem2[tid] = val2;
+    smem3[tid] = val3;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 32)
+    {
+        smem1[tid] += smem1[tid + 32];
+        smem2[tid] += smem2[tid + 32];
+        smem3[tid] += smem3[tid + 32];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16)
+    {
+        smem1[tid] += smem1[tid + 16];
+        smem2[tid] += smem2[tid + 16];
+        smem3[tid] += smem3[tid + 16];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 8)
+    {
+        smem1[tid] += smem1[tid + 8];
+        smem2[tid] += smem2[tid + 8];
+        smem3[tid] += smem3[tid + 8];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 4)
+    {
+        smem1[tid] += smem1[tid + 4];
+        smem2[tid] += smem2[tid + 4];
+        smem3[tid] += smem3[tid + 4];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid == 0)
+    {
+        smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
+        smem2[0] = (smem2[0] + smem2[1]) + (smem2[2] + smem2[3]);
+        smem3[0] = (smem3[0] + smem3[1]) + (smem3[2] + smem3[3]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+}
+
+inline void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
+{
+    smem1[tid] = val1;
+    smem2[tid] = val2;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 32)
+    {
+        smem1[tid] += smem1[tid + 32];
+        smem2[tid] += smem2[tid + 32];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16)
+    {
+        smem1[tid] += smem1[tid + 16];
+        smem2[tid] += smem2[tid + 16];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 8)
+    {
+        smem1[tid] += smem1[tid + 8];
+        smem2[tid] += smem2[tid + 8];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 4)
+    {
+        smem1[tid] += smem1[tid + 4];
+        smem2[tid] += smem2[tid + 4];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid == 0)
+    {
+        smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
+        smem2[0] = (smem2[0] + smem2[1]) + (smem2[2] + smem2[3]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+}
+
+inline void reduce1(float val1, __local float* smem1, int tid)
+{
+    smem1[tid] = val1;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 32)
+    {
+        smem1[tid] += smem1[tid + 32];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16)
+    {
+        smem1[tid] += smem1[tid + 16];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 8)
+    {
+        smem1[tid] += smem1[tid + 8];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 4)
+    {
+        smem1[tid] += smem1[tid + 4];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid == 0)
+    {
+        smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+}
+#endif
+
+#define SCALE (1.0f / (1 << 20))
+#define	THRESHOLD	0.01f
+
+// Image read mode
+__constant sampler_t sampler    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
+
+// macro to get pixel value from local memory
+
+#define VAL(_y,_x,_yy,_xx)    (IPatchLocal[mad24(((_y) + (_yy)), LM_W, ((_x) + (_xx)))])
+inline void SetPatch(local float* IPatchLocal, int TileY, int TileX,
+              float* Pch, float* Dx, float* Dy,
+              float* A11, float* A12, float* A22, float w)
+{
+    int xid=get_local_id(0);
+    int yid=get_local_id(1);
+    int xBase = mad24(TileX, LSx, (xid + 1));
+    int yBase = mad24(TileY, LSy, (yid + 1));
+
+    *Pch = VAL(yBase,xBase,0,0);
+
+    *Dx = mad((VAL(yBase,xBase,-1,1) + VAL(yBase,xBase,+1,1) - VAL(yBase,xBase,-1,-1) - VAL(yBase,xBase,+1,-1)), 3.0f, (VAL(yBase,xBase,0,1) - VAL(yBase,xBase,0,-1)) * 10.0f) * w;
+    *Dy = mad((VAL(yBase,xBase,1,-1) + VAL(yBase,xBase,1,+1) - VAL(yBase,xBase,-1,-1) - VAL(yBase,xBase,-1,+1)), 3.0f, (VAL(yBase,xBase,1,0) - VAL(yBase,xBase,-1,0)) * 10.0f) * w;
+
+    *A11 = mad(*Dx, *Dx, *A11);
+    *A12 = mad(*Dx, *Dy, *A12);
+    *A22 = mad(*Dy, *Dy, *A22);
+}
+#undef VAL
+
+inline void GetPatch(image2d_t J, float x, float y,
+              float* Pch, float* Dx, float* Dy,
+              float* b1, float* b2)
+{
+    float diff = read_imagef(J, sampler, (float2)(x,y)).x-*Pch;
+    *b1 = mad(diff, *Dx, *b1);
+    *b2 = mad(diff, *Dy, *b2);
+}
+
+inline void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval, float w)
+{
+    float diff = ((((read_imagef(J, sampler, (float2)(x,y)).x * 16384) + 256) / 512) - (((*Pch * 16384) + 256) /512)) * w;
+    *errval += fabs(diff);
+}
+
+
+//macro to read pixel value into local memory.
+#define READI(_y,_x) IPatchLocal[mad24(mad24((_y), LSy, yid), LM_W, mad24((_x), LSx, xid))] = read_imagef(I, sampler, (float2)(mad((float)(_x), (float)LSx, Point.x + xid - 0.5f), mad((float)(_y), (float)LSy, Point.y + yid - 0.5f))).x;
+void ReadPatchIToLocalMem(image2d_t I, float2 Point, local float* IPatchLocal)
+{
+    int xid=get_local_id(0);
+    int yid=get_local_id(1);
+    //read (3*LSx)*(3*LSy) window. each macro call read LSx*LSy pixels block
+    READI(0,0);READI(0,1);READI(0,2);
+    READI(1,0);READI(1,1);READI(1,2);
+    READI(2,0);READI(2,1);READI(2,2);
+    if(xid<2)
+    {// read last 2 columns border. each macro call reads 2*LSy pixels block
+        READI(0,3);
+        READI(1,3);
+        READI(2,3);
+    }
+
+    if(yid<2)
+    {// read last 2 row. each macro call reads LSx*2 pixels block
+        READI(3,0);READI(3,1);READI(3,2);
+    }
+
+    if(yid<2 && xid<2)
+    {// read right bottom 2x2 corner. one macro call reads 2*2 pixels block
+        READI(3,3);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+}
+#undef READI
+
+__attribute__((reqd_work_group_size(LSx, LSy, 1)))
+__kernel void lkSparse(image2d_t I, image2d_t J,
+                       __global const float2* prevPts, __global float2* nextPts, __global uchar* status, __global float* err,
+                       const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
+{
+    __local float smem1[BUFFER];
+    __local float smem2[BUFFER];
+    __local float smem3[BUFFER];
+
+    int xid=get_local_id(0);
+    int yid=get_local_id(1);
+    int gid=get_group_id(0);
+    int xsize=get_local_size(0);
+    int ysize=get_local_size(1);
+    int k;
+
+#ifdef CPU
+    float wx0 = 1.0f;
+    float wy0 = 1.0f;
+    int xBase = mad24(xsize, 2, xid);
+    int yBase = mad24(ysize, 2, yid);
+    float wx1 = (xBase < c_winSize_x) ? 1 : 0;
+    float wy1 = (yBase < c_winSize_y) ? 1 : 0;
+#else
+#if WSX == 1
+    float wx0 = 1.0f;
+    int xBase = mad24(xsize, 2, xid);
+    float wx1 = (xBase < c_winSize_x) ? 1 : 0;
+#else
+    int xBase = mad24(xsize, 1, xid);
+    float wx0 = (xBase < c_winSize_x) ? 1 : 0;
+    float wx1 = 0.0f;
+#endif
+#if WSY == 1
+    float wy0 = 1.0f;
+    int yBase = mad24(ysize, 2, yid);
+    float wy1 = (yBase < c_winSize_y) ? 1 : 0;
+#else
+    int yBase = mad24(ysize, 1, yid);
+    float wy0 = (yBase < c_winSize_y) ? 1 : 0;
+    float wy1 = 0.0f;
+#endif
+#endif
+
+    float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
+
+    const int tid = mad24(yid, xsize, xid);
+
+    float2 prevPt = prevPts[gid] / (float2)(1 << level);
+
+    if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
+    {
+        if (tid == 0 && level == 0)
+        {
+            status[gid] = 0;
+        }
+
+        return;
+    }
+    prevPt -= c_halfWin;
+
+    // extract the patch from the first image, compute covariation matrix of derivatives
+
+    float A11 = 0;
+    float A12 = 0;
+    float A22 = 0;
+
+    float I_patch[GRIDSIZE][GRIDSIZE];
+    float dIdx_patch[GRIDSIZE][GRIDSIZE];
+    float dIdy_patch[GRIDSIZE][GRIDSIZE];
+
+    // local memory to read image with border to calc sobels
+    local float IPatchLocal[LM_W*LM_H];
+    ReadPatchIToLocalMem(I,prevPt,IPatchLocal);
+
+    {
+        SetPatch(IPatchLocal, 0, 0,
+                 &I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
+                 &A11, &A12, &A22,1);
+
+
+        SetPatch(IPatchLocal, 0, 1,
+                 &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
+                 &A11, &A12, &A22,wx0);
+
+        SetPatch(IPatchLocal, 0, 2,
+                    &I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
+                    &A11, &A12, &A22,wx1);
+    }
+    {
+        SetPatch(IPatchLocal, 1, 0,
+                 &I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
+                 &A11, &A12, &A22,wy0);
+
+
+        SetPatch(IPatchLocal, 1,1,
+                 &I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
+                 &A11, &A12, &A22,wx0*wy0);
+
+        SetPatch(IPatchLocal, 1,2,
+                    &I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
+                    &A11, &A12, &A22,wx1*wy0);
+    }
+    {
+        SetPatch(IPatchLocal, 2,0,
+                 &I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
+                 &A11, &A12, &A22,wy1);
+
+
+        SetPatch(IPatchLocal, 2,1,
+                 &I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
+                 &A11, &A12, &A22,wx0*wy1);
+
+        SetPatch(IPatchLocal, 2,2,
+                    &I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
+                    &A11, &A12, &A22,wx1*wy1);
+    }
+
+
+    reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
+
+    A11 = smem1[0];
+    A12 = smem2[0];
+    A22 = smem3[0];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float D = mad(A11, A22, - A12 * A12);
+
+    if (D < 1.192092896e-07f)
+    {
+        if (tid == 0 && level == 0)
+            status[gid] = 0;
+
+        return;
+    }
+
+    A11 /= D;
+    A12 /= D;
+    A22 /= D;
+
+    prevPt = mad(nextPts[gid], 2.0f, - c_halfWin);
+
+    float2 offset0 = (float2)(xid + 0.5f, yid + 0.5f);
+    float2 offset1 = (float2)(xsize, ysize);
+    float2 loc0 = prevPt + offset0;
+    float2 loc1 = loc0 + offset1;
+    float2 loc2 = loc1 + offset1;
+
+    for (k = 0; k < c_iters; ++k)
+    {
+        if (prevPt.x < -c_halfWin.x || prevPt.x >= cols || prevPt.y < -c_halfWin.y || prevPt.y >= rows)
+        {
+            if (tid == 0 && level == 0)
+                status[gid] = 0;
+            break;
+        }
+        float b1 = 0;
+        float b2 = 0;
+
+        {
+            GetPatch(J, loc0.x, loc0.y,
+                     &I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],
+                     &b1, &b2);
+
+
+            GetPatch(J, loc1.x, loc0.y,
+                     &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
+                     &b1, &b2);
+
+            GetPatch(J, loc2.x, loc0.y,
+                        &I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
+                        &b1, &b2);
+        }
+        {
+            GetPatch(J, loc0.x, loc1.y,
+                     &I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
+                     &b1, &b2);
+
+
+            GetPatch(J, loc1.x, loc1.y,
+                     &I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
+                     &b1, &b2);
+
+            GetPatch(J, loc2.x, loc1.y,
+                        &I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
+                        &b1, &b2);
+        }
+        {
+            GetPatch(J, loc0.x, loc2.y,
+                     &I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
+                     &b1, &b2);
+
+
+            GetPatch(J, loc1.x, loc2.y,
+                     &I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
+                     &b1, &b2);
+
+            GetPatch(J, loc2.x, loc2.y,
+                        &I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
+                        &b1, &b2);
+        }
+
+        reduce2(b1, b2, smem1, smem2, tid);
+
+        b1 = smem1[0];
+        b2 = smem2[0];
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        float2 delta;
+        delta.x = mad(A12, b2, - A22 * b1) * 32.0f;
+        delta.y = mad(A12, b1, - A11 * b2) * 32.0f;
+
+        prevPt += delta;
+        loc0 += delta;
+        loc1 += delta;
+        loc2 += delta;
+
+        if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)
+            break;
+    }
+
+    D = 0.0f;
+    if (calcErr)
+    {
+        {
+            GetError(J, loc0.x, loc0.y, &I_patch[0][0], &D, 1);
+            GetError(J, loc1.x, loc0.y, &I_patch[0][1], &D, wx0);
+        }
+        {
+            GetError(J, loc0.x, loc1.y, &I_patch[1][0], &D, wy0);
+            GetError(J, loc1.x, loc1.y, &I_patch[1][1], &D, wx0*wy0);
+        }
+        if(xBase < c_winSize_x)
+        {
+            GetError(J, loc2.x, loc0.y, &I_patch[0][2], &D, wx1);
+            GetError(J, loc2.x, loc1.y, &I_patch[1][2], &D, wx1*wy0);
+        }
+        if(yBase < c_winSize_y)
+        {
+            GetError(J, loc0.x, loc2.y, &I_patch[2][0], &D, wy1);
+            GetError(J, loc1.x, loc2.y, &I_patch[2][1], &D, wx0*wy1);
+            if(xBase < c_winSize_x)
+                GetError(J, loc2.x, loc2.y, &I_patch[2][2], &D, wx1*wy1);
+        }
+
+        reduce1(D, smem1, tid);
+    }
+
+    if (tid == 0)
+    {
+        prevPt += c_halfWin;
+
+        nextPts[gid] = prevPt;
+
+        if (calcErr)
+            err[gid] = smem1[0] / (float)(32 * c_winSize_x * c_winSize_y);
+    }
+}
--- a/Lib/opencv/sources/modules/video/src/optflowgf.cpp
+++ b/Lib/opencv/sources/modules/video/src/optflowgf.cpp
--- a/Lib/opencv/sources/modules/video/src/optical_flow_io.cpp
+++ b/Lib/opencv/sources/modules/video/src/optical_flow_io.cpp
@@ -0,0 +1,129 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "precomp.hpp"
+#include<iostream>
+#include<fstream>
+
+namespace cv {
+
+const float FLOW_TAG_FLOAT = 202021.25f;
+const char *FLOW_TAG_STRING = "PIEH";
+CV_EXPORTS_W Mat readOpticalFlow( const String& path )
+{
+    Mat_<Point2f> flow;
+    std::ifstream file(path.c_str(), std::ios_base::binary);
+    if ( !file.good() )
+        return std::move(flow); // no file - return empty matrix
+
+    float tag;
+    file.read((char*) &tag, sizeof(float));
+    if ( tag != FLOW_TAG_FLOAT )
+        return std::move(flow);
+
+    int width, height;
+
+    file.read((char*) &width, 4);
+    file.read((char*) &height, 4);
+
+    flow.create(height, width);
+
+    for ( int i = 0; i < flow.rows; ++i )
+    {
+        for ( int j = 0; j < flow.cols; ++j )
+        {
+            Point2f u;
+            file.read((char*) &u.x, sizeof(float));
+            file.read((char*) &u.y, sizeof(float));
+            if ( !file.good() )
+            {
+                flow.release();
+                return std::move(flow);
+            }
+
+            flow(i, j) = u;
+        }
+    }
+    file.close();
+    return std::move(flow);
+}
+
+CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow )
+{
+    const int nChannels = 2;
+
+    Mat input = flow.getMat();
+    if ( input.channels() != nChannels || input.depth() != CV_32F || path.length() == 0 )
+        return false;
+
+    std::ofstream file(path.c_str(), std::ofstream::binary);
+    if ( !file.good() )
+        return false;
+
+    int nRows, nCols;
+
+    nRows = (int) input.size().height;
+    nCols = (int) input.size().width;
+
+    const int headerSize = 12;
+    char header[headerSize];
+    memcpy(header, FLOW_TAG_STRING, 4);
+    // size of ints is known - has been asserted in the current function
+    memcpy(header + 4, reinterpret_cast<const char*>(&nCols), sizeof(nCols));
+    memcpy(header + 8, reinterpret_cast<const char*>(&nRows), sizeof(nRows));
+    file.write(header, headerSize);
+    if ( !file.good() )
+        return false;
+
+    int row;
+    char* p;
+    for ( row = 0; row < nRows; row++ )
+    {
+        p = input.ptr<char>(row);
+        file.write(p, nCols * nChannels * sizeof(float));
+        if ( !file.good() )
+            return false;
+    }
+    file.close();
+    return true;
+}
+
+}
--- a/Lib/opencv/sources/modules/video/src/precomp.hpp
+++ b/Lib/opencv/sources/modules/video/src/precomp.hpp
@@ -0,0 +1,52 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include "opencv2/video.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/core/private.hpp"
+#include "opencv2/core/ocl.hpp"
+#include "opencv2/core.hpp"
+
+#endif
--- a/Lib/opencv/sources/modules/video/src/variational_refinement.cpp
+++ b/Lib/opencv/sources/modules/video/src/variational_refinement.cpp