add part of opencv

2020-01-27 20:20:56 +08:00
parent 0c4ac1d8bb
commit a71fa47620
6518 changed files with 3122580 additions and 0 deletions
--- a/Lib/opencv/sources/modules/objdetect/src/cascadedetect.cpp
+++ b/Lib/opencv/sources/modules/objdetect/src/cascadedetect.cpp
--- a/Lib/opencv/sources/modules/objdetect/src/cascadedetect.hpp
+++ b/Lib/opencv/sources/modules/objdetect/src/cascadedetect.hpp
@@ -0,0 +1,656 @@
+#pragma once
+
+#include "opencv2/core/ocl.hpp"
+
+namespace cv
+{
+
+void clipObjects(Size sz, std::vector<Rect>& objects,
+                 std::vector<int>* a, std::vector<double>* b);
+
+class FeatureEvaluator
+{
+public:
+    enum
+    {
+        HAAR = 0,
+        LBP  = 1,
+        HOG  = 2
+    };
+
+    struct ScaleData
+    {
+        ScaleData() { scale = 0.f; layer_ofs = ystep = 0; }
+        Size getWorkingSize(Size winSize) const
+        {
+            return Size(std::max(szi.width - winSize.width, 0),
+                        std::max(szi.height - winSize.height, 0));
+        }
+
+        float scale;
+        Size szi;
+        int layer_ofs, ystep;
+    };
+
+    virtual ~FeatureEvaluator();
+
+    virtual bool read(const FileNode& node, Size origWinSize);
+    virtual Ptr<FeatureEvaluator> clone() const;
+    virtual int getFeatureType() const;
+    int getNumChannels() const { return nchannels; }
+
+    virtual bool setImage(InputArray img, const std::vector<float>& scales);
+    virtual bool setWindow(Point p, int scaleIdx);
+    const ScaleData& getScaleData(int scaleIdx) const
+    {
+        CV_Assert( 0 <= scaleIdx && scaleIdx < (int)scaleData->size());
+        return scaleData->at(scaleIdx);
+    }
+    virtual void getUMats(std::vector<UMat>& bufs);
+    virtual void getMats();
+
+    Size getLocalSize() const { return localSize; }
+    Size getLocalBufSize() const { return lbufSize; }
+
+    virtual float calcOrd(int featureIdx) const;
+    virtual int calcCat(int featureIdx) const;
+
+    static Ptr<FeatureEvaluator> create(int type);
+
+protected:
+    enum { SBUF_VALID=1, USBUF_VALID=2 };
+    int sbufFlag;
+
+    bool updateScaleData( Size imgsz, const std::vector<float>& _scales );
+    virtual void computeChannels( int, InputArray ) {}
+    virtual void computeOptFeatures() {}
+
+    Size origWinSize, sbufSize, localSize, lbufSize;
+    int nchannels;
+    Mat sbuf, rbuf;
+    UMat urbuf, usbuf, ufbuf, uscaleData;
+
+    Ptr<std::vector<ScaleData> > scaleData;
+};
+
+
+class CascadeClassifierImpl CV_FINAL : public BaseCascadeClassifier
+{
+public:
+    CascadeClassifierImpl();
+    virtual ~CascadeClassifierImpl() CV_OVERRIDE;
+
+    bool empty() const CV_OVERRIDE;
+    bool load( const String& filename ) CV_OVERRIDE;
+    void read( const FileNode& node ) CV_OVERRIDE;
+    bool read_( const FileNode& node );
+    void detectMultiScale( InputArray image,
+                          CV_OUT std::vector<Rect>& objects,
+                          double scaleFactor = 1.1,
+                          int minNeighbors = 3, int flags = 0,
+                          Size minSize = Size(),
+                          Size maxSize = Size() ) CV_OVERRIDE;
+
+    void detectMultiScale( InputArray image,
+                          CV_OUT std::vector<Rect>& objects,
+                          CV_OUT std::vector<int>& numDetections,
+                          double scaleFactor=1.1,
+                          int minNeighbors=3, int flags=0,
+                          Size minSize=Size(),
+                          Size maxSize=Size() ) CV_OVERRIDE;
+
+    void detectMultiScale( InputArray image,
+                          CV_OUT std::vector<Rect>& objects,
+                          CV_OUT std::vector<int>& rejectLevels,
+                          CV_OUT std::vector<double>& levelWeights,
+                          double scaleFactor = 1.1,
+                          int minNeighbors = 3, int flags = 0,
+                          Size minSize = Size(),
+                          Size maxSize = Size(),
+                          bool outputRejectLevels = false ) CV_OVERRIDE;
+
+
+    bool isOldFormatCascade() const CV_OVERRIDE;
+    Size getOriginalWindowSize() const CV_OVERRIDE;
+    int getFeatureType() const CV_OVERRIDE;
+    void* getOldCascade() CV_OVERRIDE;
+
+    void setMaskGenerator(const Ptr<MaskGenerator>& maskGenerator) CV_OVERRIDE;
+    Ptr<MaskGenerator> getMaskGenerator() CV_OVERRIDE;
+
+protected:
+    enum { SUM_ALIGN = 64 };
+
+    bool detectSingleScale( InputArray image, Size processingRectSize,
+                            int yStep, double factor, std::vector<Rect>& candidates,
+                            std::vector<int>& rejectLevels, std::vector<double>& levelWeights,
+                            Size sumSize0, bool outputRejectLevels = false );
+#ifdef HAVE_OPENCL
+    bool ocl_detectMultiScaleNoGrouping( const std::vector<float>& scales,
+                                         std::vector<Rect>& candidates );
+#endif
+    void detectMultiScaleNoGrouping( InputArray image, std::vector<Rect>& candidates,
+                                    std::vector<int>& rejectLevels, std::vector<double>& levelWeights,
+                                    double scaleFactor, Size minObjectSize, Size maxObjectSize,
+                                    bool outputRejectLevels = false );
+
+    enum { MAX_FACES = 10000 };
+    enum { BOOST = 0 };
+    enum { DO_CANNY_PRUNING    = CASCADE_DO_CANNY_PRUNING,
+        SCALE_IMAGE         = CASCADE_SCALE_IMAGE,
+        FIND_BIGGEST_OBJECT = CASCADE_FIND_BIGGEST_OBJECT,
+        DO_ROUGH_SEARCH     = CASCADE_DO_ROUGH_SEARCH
+    };
+
+    friend class CascadeClassifierInvoker;
+    friend class SparseCascadeClassifierInvoker;
+
+    template<class FEval>
+    friend int predictOrdered( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
+
+    template<class FEval>
+    friend int predictCategorical( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
+
+    template<class FEval>
+    friend int predictOrderedStump( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
+
+    template<class FEval>
+    friend int predictCategoricalStump( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
+
+    int runAt( Ptr<FeatureEvaluator>& feval, Point pt, int scaleIdx, double& weight );
+
+    class Data
+    {
+    public:
+        struct DTreeNode
+        {
+            int featureIdx;
+            float threshold; // for ordered features only
+            int left;
+            int right;
+        };
+
+        struct DTree
+        {
+            int nodeCount;
+        };
+
+        struct Stage
+        {
+            int first;
+            int ntrees;
+            float threshold;
+        };
+
+        struct Stump
+        {
+            Stump() : featureIdx(0), threshold(0), left(0), right(0) { }
+            Stump(int _featureIdx, float _threshold, float _left, float _right)
+            : featureIdx(_featureIdx), threshold(_threshold), left(_left), right(_right) {}
+
+            int featureIdx;
+            float threshold;
+            float left;
+            float right;
+        };
+
+        Data();
+
+        bool read(const FileNode &node);
+
+        int stageType;
+        int featureType;
+        int ncategories;
+        int minNodesPerTree, maxNodesPerTree;
+        Size origWinSize;
+
+        std::vector<Stage> stages;
+        std::vector<DTree> classifiers;
+        std::vector<DTreeNode> nodes;
+        std::vector<float> leaves;
+        std::vector<int> subsets;
+        std::vector<Stump> stumps;
+    };
+
+    Data data;
+    Ptr<FeatureEvaluator> featureEvaluator;
+    Ptr<CvHaarClassifierCascade> oldCascade;
+
+    Ptr<MaskGenerator> maskGenerator;
+    UMat ugrayImage;
+    UMat ufacepos, ustages, unodes, uleaves, usubsets;
+#ifdef HAVE_OPENCL
+    ocl::Kernel haarKernel, lbpKernel;
+    bool tryOpenCL;
+#endif
+
+    Mutex mtx;
+};
+
+#define CC_CASCADE_PARAMS "cascadeParams"
+#define CC_STAGE_TYPE     "stageType"
+#define CC_FEATURE_TYPE   "featureType"
+#define CC_HEIGHT         "height"
+#define CC_WIDTH          "width"
+
+#define CC_STAGE_NUM    "stageNum"
+#define CC_STAGES       "stages"
+#define CC_STAGE_PARAMS "stageParams"
+
+#define CC_BOOST            "BOOST"
+#define CC_MAX_DEPTH        "maxDepth"
+#define CC_WEAK_COUNT       "maxWeakCount"
+#define CC_STAGE_THRESHOLD  "stageThreshold"
+#define CC_WEAK_CLASSIFIERS "weakClassifiers"
+#define CC_INTERNAL_NODES   "internalNodes"
+#define CC_LEAF_VALUES      "leafValues"
+
+#define CC_FEATURES       "features"
+#define CC_FEATURE_PARAMS "featureParams"
+#define CC_MAX_CAT_COUNT  "maxCatCount"
+
+#define CC_HAAR   "HAAR"
+#define CC_RECTS  "rects"
+#define CC_TILTED "tilted"
+
+#define CC_LBP  "LBP"
+#define CC_RECT "rect"
+
+#define CC_HOG  "HOG"
+
+#define CV_SUM_PTRS( p0, p1, p2, p3, sum, rect, step )                    \
+    /* (x, y) */                                                          \
+    (p0) = sum + (rect).x + (step) * (rect).y,                            \
+    /* (x + w, y) */                                                      \
+    (p1) = sum + (rect).x + (rect).width + (step) * (rect).y,             \
+    /* (x, y + h) */                                                      \
+    (p2) = sum + (rect).x + (step) * ((rect).y + (rect).height),          \
+    /* (x + w, y + h) */                                                  \
+    (p3) = sum + (rect).x + (rect).width + (step) * ((rect).y + (rect).height)
+
+#define CV_TILTED_PTRS( p0, p1, p2, p3, tilted, rect, step )                        \
+    /* (x, y) */                                                                    \
+    (p0) = tilted + (rect).x + (step) * (rect).y,                                   \
+    /* (x - h, y + h) */                                                            \
+    (p1) = tilted + (rect).x - (rect).height + (step) * ((rect).y + (rect).height), \
+    /* (x + w, y + w) */                                                            \
+    (p2) = tilted + (rect).x + (rect).width + (step) * ((rect).y + (rect).width),   \
+    /* (x + w - h, y + w + h) */                                                    \
+    (p3) = tilted + (rect).x + (rect).width - (rect).height                         \
+           + (step) * ((rect).y + (rect).width + (rect).height)
+
+#define CALC_SUM_(p0, p1, p2, p3, offset) \
+    ((p0)[offset] - (p1)[offset] - (p2)[offset] + (p3)[offset])
+
+#define CALC_SUM(rect,offset) CALC_SUM_((rect)[0], (rect)[1], (rect)[2], (rect)[3], offset)
+
+#define CV_SUM_OFS( p0, p1, p2, p3, sum, rect, step )                 \
+/* (x, y) */                                                          \
+(p0) = sum + (rect).x + (step) * (rect).y,                            \
+/* (x + w, y) */                                                      \
+(p1) = sum + (rect).x + (rect).width + (step) * (rect).y,             \
+/* (x, y + h) */                                                      \
+(p2) = sum + (rect).x + (step) * ((rect).y + (rect).height),          \
+/* (x + w, y + h) */                                                  \
+(p3) = sum + (rect).x + (rect).width + (step) * ((rect).y + (rect).height)
+
+#define CV_TILTED_OFS( p0, p1, p2, p3, tilted, rect, step )                     \
+/* (x, y) */                                                                    \
+(p0) = tilted + (rect).x + (step) * (rect).y,                                   \
+/* (x - h, y + h) */                                                            \
+(p1) = tilted + (rect).x - (rect).height + (step) * ((rect).y + (rect).height), \
+/* (x + w, y + w) */                                                            \
+(p2) = tilted + (rect).x + (rect).width + (step) * ((rect).y + (rect).width),   \
+/* (x + w - h, y + w + h) */                                                    \
+(p3) = tilted + (rect).x + (rect).width - (rect).height                         \
+ (step) * ((rect).y + (rect).width + (rect).height)
+
+#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
+((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
+
+#define CALC_SUM_OFS(rect, ptr) CALC_SUM_OFS_((rect)[0], (rect)[1], (rect)[2], (rect)[3], ptr)
+
+//----------------------------------------------  HaarEvaluator ---------------------------------------
+class HaarEvaluator CV_FINAL : public FeatureEvaluator
+{
+public:
+    struct Feature
+    {
+        Feature();
+        bool read(const FileNode& node, const Size& origWinSize);
+
+        bool tilted;
+
+        enum { RECT_NUM = 3 };
+        struct RectWeigth
+        {
+            Rect r;
+            float weight;
+        } rect[RECT_NUM];
+    };
+
+    struct OptFeature
+    {
+        OptFeature();
+
+        enum { RECT_NUM = Feature::RECT_NUM };
+        float calc( const int* pwin ) const;
+        void setOffsets( const Feature& _f, int step, int tofs );
+
+        int ofs[RECT_NUM][4];
+        float weight[4];
+    };
+
+    HaarEvaluator();
+    virtual ~HaarEvaluator() CV_OVERRIDE;
+
+    virtual bool read( const FileNode& node, Size origWinSize) CV_OVERRIDE;
+    virtual Ptr<FeatureEvaluator> clone() const CV_OVERRIDE;
+    virtual int getFeatureType() const CV_OVERRIDE { return FeatureEvaluator::HAAR; }
+
+    virtual bool setWindow(Point p, int scaleIdx) CV_OVERRIDE;
+    Rect getNormRect() const;
+    int getSquaresOffset() const;
+
+    float operator()(int featureIdx) const
+    { return optfeaturesPtr[featureIdx].calc(pwin) * varianceNormFactor; }
+    virtual float calcOrd(int featureIdx) const CV_OVERRIDE
+    { return (*this)(featureIdx); }
+
+protected:
+    virtual void computeChannels( int i, InputArray img ) CV_OVERRIDE;
+    virtual void computeOptFeatures() CV_OVERRIDE;
+
+    Ptr<std::vector<Feature> > features;
+    Ptr<std::vector<OptFeature> > optfeatures;
+    Ptr<std::vector<OptFeature> > optfeatures_lbuf;
+    bool hasTiltedFeatures;
+
+    int tofs, sqofs;
+    Vec4i nofs;
+    Rect normrect;
+    const int* pwin;
+    OptFeature* optfeaturesPtr; // optimization
+    float varianceNormFactor;
+};
+
+inline HaarEvaluator::Feature :: Feature()
+{
+    tilted = false;
+    rect[0].r = rect[1].r = rect[2].r = Rect();
+    rect[0].weight = rect[1].weight = rect[2].weight = 0;
+}
+
+inline HaarEvaluator::OptFeature :: OptFeature()
+{
+    weight[0] = weight[1] = weight[2] = 0.f;
+
+    ofs[0][0] = ofs[0][1] = ofs[0][2] = ofs[0][3] =
+    ofs[1][0] = ofs[1][1] = ofs[1][2] = ofs[1][3] =
+    ofs[2][0] = ofs[2][1] = ofs[2][2] = ofs[2][3] = 0;
+}
+
+inline float HaarEvaluator::OptFeature :: calc( const int* ptr ) const
+{
+    float ret = weight[0] * CALC_SUM_OFS(ofs[0], ptr) +
+                weight[1] * CALC_SUM_OFS(ofs[1], ptr);
+
+    if( weight[2] != 0.0f )
+        ret += weight[2] * CALC_SUM_OFS(ofs[2], ptr);
+
+    return ret;
+}
+
+//----------------------------------------------  LBPEvaluator -------------------------------------
+
+class LBPEvaluator CV_FINAL : public FeatureEvaluator
+{
+public:
+    struct Feature
+    {
+        Feature();
+        Feature( int x, int y, int _block_w, int _block_h  ) :
+                 rect(x, y, _block_w, _block_h) {}
+
+        bool read(const FileNode& node, const Size& origWinSize);
+
+        Rect rect; // weight and height for block
+    };
+
+    struct OptFeature
+    {
+        OptFeature();
+
+        int calc( const int* pwin ) const;
+        void setOffsets( const Feature& _f, int step );
+        int ofs[16];
+    };
+
+    LBPEvaluator();
+    virtual ~LBPEvaluator() CV_OVERRIDE;
+
+    virtual bool read( const FileNode& node, Size origWinSize ) CV_OVERRIDE;
+    virtual Ptr<FeatureEvaluator> clone() const CV_OVERRIDE;
+    virtual int getFeatureType() const CV_OVERRIDE { return FeatureEvaluator::LBP; }
+
+    virtual bool setWindow(Point p, int scaleIdx) CV_OVERRIDE;
+
+    int operator()(int featureIdx) const
+    { return optfeaturesPtr[featureIdx].calc(pwin); }
+    virtual int calcCat(int featureIdx) const CV_OVERRIDE
+    { return (*this)(featureIdx); }
+protected:
+    virtual void computeChannels( int i, InputArray img ) CV_OVERRIDE;
+    virtual void computeOptFeatures() CV_OVERRIDE;
+
+    Ptr<std::vector<Feature> > features;
+    Ptr<std::vector<OptFeature> > optfeatures;
+    Ptr<std::vector<OptFeature> > optfeatures_lbuf;
+    OptFeature* optfeaturesPtr; // optimization
+
+    const int* pwin;
+};
+
+
+inline LBPEvaluator::Feature :: Feature()
+{
+    rect = Rect();
+}
+
+inline LBPEvaluator::OptFeature :: OptFeature()
+{
+    for( int i = 0; i < 16; i++ )
+        ofs[i] = 0;
+}
+
+inline int LBPEvaluator::OptFeature :: calc( const int* p ) const
+{
+    int cval = CALC_SUM_OFS_( ofs[5], ofs[6], ofs[9], ofs[10], p );
+
+    return (CALC_SUM_OFS_( ofs[0], ofs[1], ofs[4], ofs[5], p ) >= cval ? 128 : 0) |   // 0
+           (CALC_SUM_OFS_( ofs[1], ofs[2], ofs[5], ofs[6], p ) >= cval ? 64 : 0) |    // 1
+           (CALC_SUM_OFS_( ofs[2], ofs[3], ofs[6], ofs[7], p ) >= cval ? 32 : 0) |    // 2
+           (CALC_SUM_OFS_( ofs[6], ofs[7], ofs[10], ofs[11], p ) >= cval ? 16 : 0) |  // 5
+           (CALC_SUM_OFS_( ofs[10], ofs[11], ofs[14], ofs[15], p ) >= cval ? 8 : 0)|  // 8
+           (CALC_SUM_OFS_( ofs[9], ofs[10], ofs[13], ofs[14], p ) >= cval ? 4 : 0)|   // 7
+           (CALC_SUM_OFS_( ofs[8], ofs[9], ofs[12], ofs[13], p ) >= cval ? 2 : 0)|    // 6
+           (CALC_SUM_OFS_( ofs[4], ofs[5], ofs[8], ofs[9], p ) >= cval ? 1 : 0);
+}
+
+
+//----------------------------------------------  predictor functions -------------------------------------
+
+template<class FEval>
+inline int predictOrdered( CascadeClassifierImpl& cascade,
+                           Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
+{
+    CV_INSTRUMENT_REGION();
+
+    int nstages = (int)cascade.data.stages.size();
+    int nodeOfs = 0, leafOfs = 0;
+    FEval& featureEvaluator = (FEval&)*_featureEvaluator;
+    float* cascadeLeaves = &cascade.data.leaves[0];
+    CascadeClassifierImpl::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0];
+    CascadeClassifierImpl::Data::DTree* cascadeWeaks = &cascade.data.classifiers[0];
+    CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
+
+    for( int si = 0; si < nstages; si++ )
+    {
+        CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si];
+        int wi, ntrees = stage.ntrees;
+        sum = 0;
+
+        for( wi = 0; wi < ntrees; wi++ )
+        {
+            CascadeClassifierImpl::Data::DTree& weak = cascadeWeaks[stage.first + wi];
+            int idx = 0, root = nodeOfs;
+
+            do
+            {
+                CascadeClassifierImpl::Data::DTreeNode& node = cascadeNodes[root + idx];
+                double val = featureEvaluator(node.featureIdx);
+                idx = val < node.threshold ? node.left : node.right;
+            }
+            while( idx > 0 );
+            sum += cascadeLeaves[leafOfs - idx];
+            nodeOfs += weak.nodeCount;
+            leafOfs += weak.nodeCount + 1;
+        }
+        if( sum < stage.threshold )
+            return -si;
+    }
+    return 1;
+}
+
+template<class FEval>
+inline int predictCategorical( CascadeClassifierImpl& cascade,
+                               Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
+{
+    CV_INSTRUMENT_REGION();
+
+    int nstages = (int)cascade.data.stages.size();
+    int nodeOfs = 0, leafOfs = 0;
+    FEval& featureEvaluator = (FEval&)*_featureEvaluator;
+    size_t subsetSize = (cascade.data.ncategories + 31)/32;
+    int* cascadeSubsets = &cascade.data.subsets[0];
+    float* cascadeLeaves = &cascade.data.leaves[0];
+    CascadeClassifierImpl::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0];
+    CascadeClassifierImpl::Data::DTree* cascadeWeaks = &cascade.data.classifiers[0];
+    CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
+
+    for(int si = 0; si < nstages; si++ )
+    {
+        CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si];
+        int wi, ntrees = stage.ntrees;
+        sum = 0;
+
+        for( wi = 0; wi < ntrees; wi++ )
+        {
+            CascadeClassifierImpl::Data::DTree& weak = cascadeWeaks[stage.first + wi];
+            int idx = 0, root = nodeOfs;
+            do
+            {
+                CascadeClassifierImpl::Data::DTreeNode& node = cascadeNodes[root + idx];
+                int c = featureEvaluator(node.featureIdx);
+                const int* subset = &cascadeSubsets[(root + idx)*subsetSize];
+                idx = (subset[c>>5] & (1 << (c & 31))) ? node.left : node.right;
+            }
+            while( idx > 0 );
+            sum += cascadeLeaves[leafOfs - idx];
+            nodeOfs += weak.nodeCount;
+            leafOfs += weak.nodeCount + 1;
+        }
+        if( sum < stage.threshold )
+            return -si;
+    }
+    return 1;
+}
+
+template<class FEval>
+inline int predictOrderedStump( CascadeClassifierImpl& cascade,
+                                Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
+{
+    CV_INSTRUMENT_REGION();
+
+    CV_Assert(!cascade.data.stumps.empty());
+    FEval& featureEvaluator = (FEval&)*_featureEvaluator;
+    const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0];
+    const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
+
+    int nstages = (int)cascade.data.stages.size();
+    double tmp = 0;
+
+    for( int stageIdx = 0; stageIdx < nstages; stageIdx++ )
+    {
+        const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[stageIdx];
+        tmp = 0;
+
+        int ntrees = stage.ntrees;
+        for( int i = 0; i < ntrees; i++ )
+        {
+            const CascadeClassifierImpl::Data::Stump& stump = cascadeStumps[i];
+            double value = featureEvaluator(stump.featureIdx);
+            tmp += value < stump.threshold ? stump.left : stump.right;
+        }
+
+        if( tmp < stage.threshold )
+        {
+            sum = (double)tmp;
+            return -stageIdx;
+        }
+        cascadeStumps += ntrees;
+    }
+
+    sum = (double)tmp;
+    return 1;
+}
+
+template<class FEval>
+inline int predictCategoricalStump( CascadeClassifierImpl& cascade,
+                                    Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
+{
+    CV_INSTRUMENT_REGION();
+
+    CV_Assert(!cascade.data.stumps.empty());
+    int nstages = (int)cascade.data.stages.size();
+    FEval& featureEvaluator = (FEval&)*_featureEvaluator;
+    size_t subsetSize = (cascade.data.ncategories + 31)/32;
+    const int* cascadeSubsets = &cascade.data.subsets[0];
+    const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0];
+    const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
+
+    double tmp = 0;
+    for( int si = 0; si < nstages; si++ )
+    {
+        const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si];
+        int wi, ntrees = stage.ntrees;
+        tmp = 0;
+
+        for( wi = 0; wi < ntrees; wi++ )
+        {
+            const CascadeClassifierImpl::Data::Stump& stump = cascadeStumps[wi];
+            int c = featureEvaluator(stump.featureIdx);
+            const int* subset = &cascadeSubsets[wi*subsetSize];
+            tmp += (subset[c>>5] & (1 << (c & 31))) ? stump.left : stump.right;
+        }
+
+        if( tmp < stage.threshold )
+        {
+            sum = tmp;
+            return -si;
+        }
+
+        cascadeStumps += ntrees;
+        cascadeSubsets += ntrees*subsetSize;
+    }
+
+    sum = (double)tmp;
+    return 1;
+}
+
+namespace haar_cvt
+{
+bool convert(const FileNode& oldcascade_root, FileStorage& newfs);
+}
+
+}
--- a/Lib/opencv/sources/modules/objdetect/src/cascadedetect_convert.cpp
+++ b/Lib/opencv/sources/modules/objdetect/src/cascadedetect_convert.cpp
@@ -0,0 +1,273 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, Itseez Inc, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/* Haar features calculation */
+
+#include "precomp.hpp"
+#include "cascadedetect.hpp"
+#include <stdio.h>
+
+namespace cv
+{
+
+/* field names */
+
+#define ICV_HAAR_SIZE_NAME            "size"
+#define ICV_HAAR_STAGES_NAME          "stages"
+#define ICV_HAAR_TREES_NAME           "trees"
+#define ICV_HAAR_FEATURE_NAME         "feature"
+#define ICV_HAAR_RECTS_NAME           "rects"
+#define ICV_HAAR_TILTED_NAME          "tilted"
+#define ICV_HAAR_THRESHOLD_NAME       "threshold"
+#define ICV_HAAR_LEFT_NODE_NAME       "left_node"
+#define ICV_HAAR_LEFT_VAL_NAME        "left_val"
+#define ICV_HAAR_RIGHT_NODE_NAME      "right_node"
+#define ICV_HAAR_RIGHT_VAL_NAME       "right_val"
+#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold"
+#define ICV_HAAR_PARENT_NAME          "parent"
+#define ICV_HAAR_NEXT_NAME            "next"
+
+namespace haar_cvt
+{
+
+struct HaarFeature
+{
+    enum { RECT_NUM = 3 };
+
+    HaarFeature()
+    {
+        tilted = false;
+        for( int i = 0; i < RECT_NUM; i++ )
+        {
+            rect[i].r = Rect(0,0,0,0);
+            rect[i].weight = 0.f;
+        }
+    }
+    bool tilted;
+    struct
+    {
+        Rect r;
+        float weight;
+    } rect[RECT_NUM];
+};
+
+struct HaarClassifierNode
+{
+    HaarClassifierNode()
+    {
+        f = left = right = 0;
+        threshold = 0.f;
+    }
+    int f, left, right;
+    float threshold;
+};
+
+struct HaarClassifier
+{
+    std::vector<HaarClassifierNode> nodes;
+    std::vector<float> leaves;
+};
+
+struct HaarStageClassifier
+{
+    double threshold;
+    std::vector<HaarClassifier> weaks;
+};
+
+bool convert(const FileNode& oldroot, FileStorage& newfs)
+{
+    FileNode sznode = oldroot[ICV_HAAR_SIZE_NAME];
+    if( sznode.empty() )
+        return false;
+    Size cascadesize;
+    cascadesize.width = (int)sznode[0];
+    cascadesize.height = (int)sznode[1];
+    std::vector<HaarFeature> features;
+
+    int i, j, k, n;
+
+    FileNode stages_seq = oldroot[ICV_HAAR_STAGES_NAME];
+    int nstages = (int)stages_seq.size();
+    std::vector<HaarStageClassifier> stages(nstages);
+
+    for( i = 0; i < nstages; i++ )
+    {
+        FileNode stagenode = stages_seq[i];
+        HaarStageClassifier& stage = stages[i];
+        stage.threshold = (double)stagenode[ICV_HAAR_STAGE_THRESHOLD_NAME];
+        FileNode weaks_seq = stagenode[ICV_HAAR_TREES_NAME];
+        int nweaks = (int)weaks_seq.size();
+        stage.weaks.resize(nweaks);
+
+        for( j = 0; j < nweaks; j++ )
+        {
+            HaarClassifier& weak = stage.weaks[j];
+            FileNode weaknode = weaks_seq[j];
+            int nnodes = (int)weaknode.size();
+
+            for( n = 0; n < nnodes; n++ )
+            {
+                FileNode nnode = weaknode[n];
+                FileNode fnode = nnode[ICV_HAAR_FEATURE_NAME];
+                HaarFeature f;
+                HaarClassifierNode node;
+                node.f = (int)features.size();
+                f.tilted = (int)fnode[ICV_HAAR_TILTED_NAME] != 0;
+                FileNode rects_seq = fnode[ICV_HAAR_RECTS_NAME];
+                int nrects = (int)rects_seq.size();
+
+                for( k = 0; k < nrects; k++ )
+                {
+                    FileNode rnode = rects_seq[k];
+                    f.rect[k].r.x = (int)rnode[0];
+                    f.rect[k].r.y = (int)rnode[1];
+                    f.rect[k].r.width = (int)rnode[2];
+                    f.rect[k].r.height = (int)rnode[3];
+                    f.rect[k].weight = (float)rnode[4];
+                }
+                features.push_back(f);
+                node.threshold = nnode[ICV_HAAR_THRESHOLD_NAME];
+                FileNode leftValNode = nnode[ICV_HAAR_LEFT_VAL_NAME];
+                if( !leftValNode.empty() )
+                {
+                    node.left = -(int)weak.leaves.size();
+                    weak.leaves.push_back((float)leftValNode);
+                }
+                else
+                {
+                    node.left = (int)nnode[ICV_HAAR_LEFT_NODE_NAME];
+                }
+                FileNode rightValNode = nnode[ICV_HAAR_RIGHT_VAL_NAME];
+                if( !rightValNode.empty() )
+                {
+                    node.right = -(int)weak.leaves.size();
+                    weak.leaves.push_back((float)rightValNode);
+                }
+                else
+                {
+                    node.right = (int)nnode[ICV_HAAR_RIGHT_NODE_NAME];
+                }
+                weak.nodes.push_back(node);
+            }
+        }
+    }
+
+    int maxWeakCount = 0, nfeatures = (int)features.size();
+    for( i = 0; i < nstages; i++ )
+        maxWeakCount = std::max(maxWeakCount, (int)stages[i].weaks.size());
+
+    newfs << "cascade" << "{:opencv-cascade-classifier"
+    << "stageType" << "BOOST"
+    << "featureType" << "HAAR"
+    << "height" << cascadesize.width
+    << "width" << cascadesize.height
+    << "stageParams" << "{"
+        << "maxWeakCount" << (int)maxWeakCount
+    << "}"
+    << "featureParams" << "{"
+        << "maxCatCount" << 0
+    << "}"
+    << "stageNum" << (int)nstages
+    << "stages" << "[";
+
+    for( i = 0; i < nstages; i++ )
+    {
+        int nweaks = (int)stages[i].weaks.size();
+        newfs << "{" << "maxWeakCount" << (int)nweaks
+            << "stageThreshold" << stages[i].threshold
+            << "weakClassifiers" << "[";
+        for( j = 0; j < nweaks; j++ )
+        {
+            const HaarClassifier& c = stages[i].weaks[j];
+            newfs << "{" << "internalNodes" << "[:";
+            int nnodes = (int)c.nodes.size(), nleaves = (int)c.leaves.size();
+            for( k = 0; k < nnodes; k++ )
+                newfs << c.nodes[k].left << c.nodes[k].right
+                    << c.nodes[k].f << c.nodes[k].threshold;
+            newfs << "]" << "leafValues" << "[:";
+            for( k = 0; k < nleaves; k++ )
+                newfs << c.leaves[k];
+            newfs << "]" << "}";
+        }
+        newfs << "]" << "}";
+    }
+
+    newfs << "]"
+        << "features" << "[";
+
+    for( i = 0; i < nfeatures; i++ )
+    {
+        const HaarFeature& f = features[i];
+        newfs << "{" << "rects" << "[";
+        for( j = 0; j < HaarFeature::RECT_NUM; j++ )
+        {
+            if( j >= 2 && fabs(f.rect[j].weight) < FLT_EPSILON )
+                break;
+            newfs << "[:" << f.rect[j].r.x << f.rect[j].r.y <<
+                f.rect[j].r.width << f.rect[j].r.height << f.rect[j].weight << "]";
+        }
+        newfs << "]";
+        if( f.tilted )
+            newfs << "tilted" << 1;
+        newfs << "}";
+    }
+
+    newfs << "]" << "}";
+    return true;
+}
+
+}
+
+bool CascadeClassifier::convert(const String& oldcascade, const String& newcascade)
+{
+    FileStorage oldfs(oldcascade, FileStorage::READ);
+    FileStorage newfs(newcascade, FileStorage::WRITE);
+    if( !oldfs.isOpened() || !newfs.isOpened() )
+        return false;
+    FileNode oldroot = oldfs.getFirstTopLevelNode();
+
+    bool ok = haar_cvt::convert(oldroot, newfs);
+    if( !ok && newcascade.size() > 0 )
+        remove(newcascade.c_str());
+    return ok;
+}
+
+}
--- a/Lib/opencv/sources/modules/objdetect/src/detection_based_tracker.cpp
+++ b/Lib/opencv/sources/modules/objdetect/src/detection_based_tracker.cpp
@@ -0,0 +1,885 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#include "opencv2/core/utility.hpp"
+
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+
+#if defined(DEBUG) || defined(_DEBUG)
+#undef DEBUGLOGS
+#define DEBUGLOGS 1
+#endif
+
+#ifndef DEBUGLOGS
+#define DEBUGLOGS 0
+#endif
+
+#ifdef __ANDROID__
+#include <android/log.h>
+#define LOG_TAG "OBJECT_DETECTOR"
+#define LOGD0(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__))
+#define LOGI0(...) ((void)__android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__))
+#define LOGW0(...) ((void)__android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__))
+#define LOGE0(...) ((void)__android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__))
+#else
+
+#include <stdio.h>
+
+#define LOGD0(_str, ...) (printf(_str , ## __VA_ARGS__), printf("\n"), fflush(stdout))
+#define LOGI0(_str, ...) (printf(_str , ## __VA_ARGS__), printf("\n"), fflush(stdout))
+#define LOGW0(_str, ...) (printf(_str , ## __VA_ARGS__), printf("\n"), fflush(stdout))
+#define LOGE0(_str, ...) (printf(_str , ## __VA_ARGS__), printf("\n"), fflush(stdout))
+#endif //__ANDROID__
+
+#if DEBUGLOGS
+#define LOGD(_str, ...) LOGD0(_str , ## __VA_ARGS__)
+#define LOGI(_str, ...) LOGI0(_str , ## __VA_ARGS__)
+#define LOGW(_str, ...) LOGW0(_str , ## __VA_ARGS__)
+#define LOGE(_str, ...) LOGE0(_str , ## __VA_ARGS__)
+#else
+#define LOGD(...)
+#define LOGI(...)
+#define LOGW(...)
+#define LOGE(...)
+#endif //DEBUGLOGS
+
+
+using namespace cv;
+
+static inline cv::Point2f centerRect(const cv::Rect& r)
+{
+    return cv::Point2f(r.x+((float)r.width)/2, r.y+((float)r.height)/2);
+}
+
+static inline cv::Rect scale_rect(const cv::Rect& r, float scale)
+{
+    cv::Point2f m=centerRect(r);
+    float width  = r.width  * scale;
+    float height = r.height * scale;
+    int x=cvRound(m.x - width/2);
+    int y=cvRound(m.y - height/2);
+
+    return cv::Rect(x, y, cvRound(width), cvRound(height));
+}
+
+namespace cv
+{
+    void* workcycleObjectDetectorFunction(void* p);
+}
+
+class cv::DetectionBasedTracker::SeparateDetectionWork
+{
+    public:
+        SeparateDetectionWork(cv::DetectionBasedTracker& _detectionBasedTracker, cv::Ptr<DetectionBasedTracker::IDetector> _detector,
+                              const cv::DetectionBasedTracker::Parameters& params);
+        virtual ~SeparateDetectionWork();
+        bool communicateWithDetectingThread(const Mat& imageGray, std::vector<Rect>& rectsWhereRegions);
+        bool run();
+        void stop();
+        void resetTracking();
+
+        inline bool isWorking()
+        {
+            return (stateThread==STATE_THREAD_WORKING_SLEEPING) || (stateThread==STATE_THREAD_WORKING_WITH_IMAGE);
+        }
+        void setParameters(const cv::DetectionBasedTracker::Parameters& params)
+        {
+            std::unique_lock<std::mutex> mtx_lock(mtx);
+            parameters = params;
+        }
+
+        inline void init()
+        {
+            std::unique_lock<std::mutex> mtx_lock(mtx);
+            stateThread = STATE_THREAD_STOPPED;
+            isObjectDetectingReady = false;
+            shouldObjectDetectingResultsBeForgot = false;
+            objectDetectorThreadStartStop.notify_one();
+        }
+    protected:
+
+        DetectionBasedTracker& detectionBasedTracker;
+        cv::Ptr<DetectionBasedTracker::IDetector> cascadeInThread;
+        std::thread second_workthread;
+        std::mutex mtx;
+        std::condition_variable objectDetectorRun;
+        std::condition_variable objectDetectorThreadStartStop;
+        std::vector<cv::Rect> resultDetect;
+        volatile bool isObjectDetectingReady;
+        volatile bool shouldObjectDetectingResultsBeForgot;
+
+        enum StateSeparatedThread {
+            STATE_THREAD_STOPPED=0,
+            STATE_THREAD_WORKING_SLEEPING,
+            STATE_THREAD_WORKING_WITH_IMAGE,
+            STATE_THREAD_WORKING,
+            STATE_THREAD_STOPPING
+        };
+        volatile StateSeparatedThread stateThread;
+
+        cv::Mat imageSeparateDetecting;
+
+        void workcycleObjectDetector();
+        friend void* workcycleObjectDetectorFunction(void* p);
+
+        long long  timeWhenDetectingThreadStartedWork;
+        cv::DetectionBasedTracker::Parameters parameters;
+};
+
+cv::DetectionBasedTracker::SeparateDetectionWork::SeparateDetectionWork(DetectionBasedTracker& _detectionBasedTracker, cv::Ptr<DetectionBasedTracker::IDetector> _detector,
+                                                                        const cv::DetectionBasedTracker::Parameters& params)
+    :detectionBasedTracker(_detectionBasedTracker),
+    cascadeInThread(),
+    isObjectDetectingReady(false),
+    shouldObjectDetectingResultsBeForgot(false),
+    stateThread(STATE_THREAD_STOPPED),
+    timeWhenDetectingThreadStartedWork(-1),
+    parameters(params)
+{
+    CV_Assert(_detector);
+
+    cascadeInThread = _detector;
+}
+
+cv::DetectionBasedTracker::SeparateDetectionWork::~SeparateDetectionWork()
+{
+    if(stateThread!=STATE_THREAD_STOPPED) {
+        LOGE("\n\n\nATTENTION!!! dangerous algorithm error: destructor DetectionBasedTracker::DetectionBasedTracker::~SeparateDetectionWork is called before stopping the workthread");
+    }
+    second_workthread.join();
+}
+bool cv::DetectionBasedTracker::SeparateDetectionWork::run()
+{
+    LOGD("DetectionBasedTracker::SeparateDetectionWork::run() --- start");
+    std::unique_lock<std::mutex> mtx_lock(mtx);
+    // unlocked when leaving scope
+    if (stateThread != STATE_THREAD_STOPPED) {
+        LOGE("DetectionBasedTracker::SeparateDetectionWork::run is called while the previous run is not stopped");
+        return false;
+    }
+    stateThread=STATE_THREAD_WORKING_SLEEPING;
+    second_workthread = std::thread(workcycleObjectDetectorFunction, (void*)this); //TODO: add attributes?
+    objectDetectorThreadStartStop.wait(mtx_lock);
+    LOGD("DetectionBasedTracker::SeparateDetectionWork::run --- end");
+    return true;
+}
+
+#define CATCH_ALL_AND_LOG(_block)                                                           \
+    try {                                                                                   \
+        _block;                                                                             \
+    }                                                                                       \
+    catch(const cv::Exception& e) {                                                         \
+        LOGE0("\n %s: ERROR: OpenCV Exception caught: \n'%s'\n\n", CV_Func, e.what());      \
+    } catch(const std::exception& e) {                                                      \
+        LOGE0("\n %s: ERROR: Exception caught: \n'%s'\n\n", CV_Func, e.what());             \
+    } catch(...) {                                                                          \
+        LOGE0("\n %s: ERROR: UNKNOWN Exception caught\n\n", CV_Func);                       \
+    }
+
+void* cv::workcycleObjectDetectorFunction(void* p)
+{
+    CATCH_ALL_AND_LOG({ ((cv::DetectionBasedTracker::SeparateDetectionWork*)p)->workcycleObjectDetector(); });
+    try{
+        ((cv::DetectionBasedTracker::SeparateDetectionWork*)p)->init();
+    } catch(...) {
+        LOGE0("DetectionBasedTracker: workcycleObjectDetectorFunction: ERROR concerning pointer, received as the function parameter");
+    }
+    return NULL;
+}
+
+void cv::DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector()
+{
+    static double freq = getTickFrequency();
+    LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- start");
+    std::vector<Rect> objects;
+
+    CV_Assert(stateThread==STATE_THREAD_WORKING_SLEEPING);
+    std::unique_lock<std::mutex> mtx_lock(mtx);
+    {
+        objectDetectorThreadStartStop.notify_one();
+        LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- before waiting");
+        CV_Assert(stateThread==STATE_THREAD_WORKING_SLEEPING);
+        objectDetectorRun.wait(mtx_lock);
+        if (isWorking()) {
+            stateThread=STATE_THREAD_WORKING_WITH_IMAGE;
+        }
+        LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- after waiting");
+    }
+    mtx_lock.unlock();
+
+    bool isFirstStep=true;
+
+    isObjectDetectingReady=false;
+
+    while(isWorking())
+    {
+        LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- next step");
+
+        if (! isFirstStep) {
+            LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- before waiting");
+            CV_Assert(stateThread==STATE_THREAD_WORKING_SLEEPING);
+            mtx_lock.lock();
+            if (!isWorking()) {//it is a rare case, but may cause a crash
+                LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- go out from the workcycle from inner part of lock just before waiting");
+                mtx_lock.unlock();
+                break;
+            }
+            CV_Assert(stateThread==STATE_THREAD_WORKING_SLEEPING);
+            objectDetectorRun.wait(mtx_lock);
+            if (isWorking()) {
+                stateThread=STATE_THREAD_WORKING_WITH_IMAGE;
+            }
+            mtx_lock.unlock();
+
+            LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- after waiting");
+        } else {
+            isFirstStep=false;
+        }
+
+        if (!isWorking()) {
+            LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- go out from the workcycle just after waiting");
+            break;
+        }
+
+
+        if (imageSeparateDetecting.empty()) {
+            LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- imageSeparateDetecting is empty, continue");
+            continue;
+        }
+        LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- start handling imageSeparateDetecting, img.size=%dx%d, img.data=0x%p",
+                imageSeparateDetecting.size().width, imageSeparateDetecting.size().height, (void*)imageSeparateDetecting.data);
+
+
+        int64 t1_detect=getTickCount();
+
+        cascadeInThread->detect(imageSeparateDetecting, objects);
+
+        /*cascadeInThread.detectMultiScale( imageSeparateDetecting, objects,
+                detectionBasedTracker.parameters.scaleFactor, detectionBasedTracker.parameters.minNeighbors, 0
+                |CV_HAAR_SCALE_IMAGE
+                ,
+                min_objectSize,
+                max_objectSize
+                );
+        */
+
+        LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- end handling imageSeparateDetecting");
+
+        if (!isWorking()) {
+            LOGD("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- go out from the workcycle just after detecting");
+            break;
+        }
+
+        int64 t2_detect = getTickCount();
+        int64 dt_detect = t2_detect-t1_detect;
+        double dt_detect_ms=((double)dt_detect)/freq * 1000.0;
+        (void)(dt_detect_ms);
+
+        LOGI("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector() --- objects num==%d, t_ms=%.4f", (int)objects.size(), dt_detect_ms);
+        mtx_lock.lock();
+        if (!shouldObjectDetectingResultsBeForgot) {
+            resultDetect=objects;
+            isObjectDetectingReady=true;
+        } else { //shouldObjectDetectingResultsBeForgot==true
+            resultDetect.clear();
+            isObjectDetectingReady=false;
+            shouldObjectDetectingResultsBeForgot=false;
+        }
+        if(isWorking()) {
+            stateThread=STATE_THREAD_WORKING_SLEEPING;
+        }
+        mtx_lock.unlock();
+
+        objects.clear();
+    }// while(isWorking())
+
+    LOGI("DetectionBasedTracker::SeparateDetectionWork::workcycleObjectDetector: Returning");
+}
+
+void cv::DetectionBasedTracker::SeparateDetectionWork::stop()
+{
+    //FIXME: TODO: should add quickStop functionality
+  std::unique_lock<std::mutex> mtx_lock(mtx);
+    if (!isWorking()) {
+        mtx_lock.unlock();
+        LOGE("SimpleHighguiDemoCore::stop is called but the SimpleHighguiDemoCore pthread is not active");
+        stateThread = STATE_THREAD_STOPPING;
+        return;
+    }
+    stateThread=STATE_THREAD_STOPPING;
+    LOGD("DetectionBasedTracker::SeparateDetectionWork::stop: before going to sleep to wait for the signal from the workthread");
+    objectDetectorRun.notify_one();
+    objectDetectorThreadStartStop.wait(mtx_lock);
+    LOGD("DetectionBasedTracker::SeparateDetectionWork::stop: after receiving the signal from the workthread, stateThread=%d", (int)stateThread);
+    mtx_lock.unlock();
+}
+
+void cv::DetectionBasedTracker::SeparateDetectionWork::resetTracking()
+{
+    LOGD("DetectionBasedTracker::SeparateDetectionWork::resetTracking");
+    std::unique_lock<std::mutex> mtx_lock(mtx);
+
+    if (stateThread == STATE_THREAD_WORKING_WITH_IMAGE) {
+        LOGD("DetectionBasedTracker::SeparateDetectionWork::resetTracking: since workthread is detecting objects at the moment, we should make cascadeInThread stop detecting and forget the detecting results");
+        shouldObjectDetectingResultsBeForgot=true;
+        //cascadeInThread.setStopFlag();//FIXME: TODO: this feature also should be contributed to OpenCV
+    } else {
+        LOGD("DetectionBasedTracker::SeparateDetectionWork::resetTracking: since workthread is NOT detecting objects at the moment, we should NOT make any additional actions");
+    }
+
+    resultDetect.clear();
+    isObjectDetectingReady=false;
+
+    mtx_lock.unlock();
+}
+
+bool cv::DetectionBasedTracker::SeparateDetectionWork::communicateWithDetectingThread(const Mat& imageGray, std::vector<Rect>& rectsWhereRegions)
+{
+    static double freq = getTickFrequency();
+
+    bool shouldCommunicateWithDetectingThread = (stateThread==STATE_THREAD_WORKING_SLEEPING);
+    LOGD("DetectionBasedTracker::SeparateDetectionWork::communicateWithDetectingThread: shouldCommunicateWithDetectingThread=%d", (shouldCommunicateWithDetectingThread?1:0));
+
+    if (!shouldCommunicateWithDetectingThread) {
+        return false;
+    }
+
+    bool shouldHandleResult = false;
+
+    std::unique_lock<std::mutex> mtx_lock(mtx);
+
+    if (isObjectDetectingReady) {
+        shouldHandleResult=true;
+        rectsWhereRegions = resultDetect;
+        isObjectDetectingReady=false;
+
+        double lastBigDetectionDuration = 1000.0 * (((double)(getTickCount()  - timeWhenDetectingThreadStartedWork )) / freq);
+        (void)(lastBigDetectionDuration);
+        LOGD("DetectionBasedTracker::SeparateDetectionWork::communicateWithDetectingThread: lastBigDetectionDuration=%f ms", (double)lastBigDetectionDuration);
+    }
+
+    bool shouldSendNewDataToWorkThread = true;
+    if (timeWhenDetectingThreadStartedWork > 0) {
+        double time_from_previous_launch_in_ms=1000.0 * (((double)(getTickCount()  - timeWhenDetectingThreadStartedWork )) / freq); //the same formula as for lastBigDetectionDuration
+        shouldSendNewDataToWorkThread = (time_from_previous_launch_in_ms >= detectionBasedTracker.parameters.minDetectionPeriod);
+        LOGD("DetectionBasedTracker::SeparateDetectionWork::communicateWithDetectingThread: shouldSendNewDataToWorkThread was 1, now it is %d, since time_from_previous_launch_in_ms=%.2f, minDetectionPeriod=%d",
+                (shouldSendNewDataToWorkThread?1:0), time_from_previous_launch_in_ms, detectionBasedTracker.parameters.minDetectionPeriod);
+    }
+
+    if (shouldSendNewDataToWorkThread) {
+
+        imageSeparateDetecting.create(imageGray.size(), CV_8UC1);
+
+        imageGray.copyTo(imageSeparateDetecting);//may change imageSeparateDetecting ptr. But should not.
+
+
+        timeWhenDetectingThreadStartedWork = getTickCount() ;
+
+        objectDetectorRun.notify_one();
+    }
+
+    mtx_lock.unlock();
+    LOGD("DetectionBasedTracker::SeparateDetectionWork::communicateWithDetectingThread: result: shouldHandleResult=%d", (shouldHandleResult?1:0));
+
+    return shouldHandleResult;
+}
+
+cv::DetectionBasedTracker::Parameters::Parameters()
+{
+  maxTrackLifetime = 5;
+  minDetectionPeriod = 0;
+}
+
+cv::DetectionBasedTracker::InnerParameters::InnerParameters()
+{
+    numLastPositionsToTrack=4;
+    numStepsToWaitBeforeFirstShow=6;
+    numStepsToTrackWithoutDetectingIfObjectHasNotBeenShown=3;
+    numStepsToShowWithoutDetecting=3;
+
+    coeffTrackingWindowSize=2.0;
+    coeffObjectSizeToTrack=0.85f;
+    coeffObjectSpeedUsingInPrediction=0.8f;
+
+}
+
+cv::DetectionBasedTracker::DetectionBasedTracker(cv::Ptr<IDetector> mainDetector, cv::Ptr<IDetector> trackingDetector, const Parameters& params)
+    :separateDetectionWork(),
+    parameters(params),
+    innerParameters(),
+    numTrackedSteps(0),
+    cascadeForTracking(trackingDetector)
+{
+    CV_Assert( (params.maxTrackLifetime >= 0)
+//            && mainDetector
+            && trackingDetector );
+
+    if (mainDetector) {
+        Ptr<SeparateDetectionWork> tmp(new SeparateDetectionWork(*this, mainDetector, params));
+        separateDetectionWork.swap(tmp);
+    }
+
+    weightsPositionsSmoothing.push_back(1);
+    weightsSizesSmoothing.push_back(0.5);
+    weightsSizesSmoothing.push_back(0.3f);
+    weightsSizesSmoothing.push_back(0.2f);
+}
+
+cv::DetectionBasedTracker::~DetectionBasedTracker()
+{
+}
+
+void DetectionBasedTracker::process(const Mat& imageGray)
+{
+    CV_INSTRUMENT_REGION();
+
+    CV_Assert(imageGray.type()==CV_8UC1);
+
+    if ( separateDetectionWork && !separateDetectionWork->isWorking() ) {
+        separateDetectionWork->run();
+    }
+
+    static double freq = getTickFrequency();
+    static long long time_when_last_call_started=getTickCount();
+
+    {
+        double delta_time_from_prev_call=1000.0 * (((double)(getTickCount()  - time_when_last_call_started)) / freq);
+        (void)(delta_time_from_prev_call);
+        LOGD("DetectionBasedTracker::process: time from the previous call is %f ms", (double)delta_time_from_prev_call);
+        time_when_last_call_started=getTickCount();
+    }
+
+    Mat imageDetect=imageGray;
+
+    std::vector<Rect> rectsWhereRegions;
+    bool shouldHandleResult=false;
+    if (separateDetectionWork) {
+        shouldHandleResult = separateDetectionWork->communicateWithDetectingThread(imageGray, rectsWhereRegions);
+    }
+
+    if (shouldHandleResult) {
+        LOGD("DetectionBasedTracker::process: get _rectsWhereRegions were got from resultDetect");
+    } else {
+        LOGD("DetectionBasedTracker::process: get _rectsWhereRegions from previous positions");
+        for(size_t i = 0; i < trackedObjects.size(); i++) {
+            size_t n = trackedObjects[i].lastPositions.size();
+            CV_Assert(n > 0);
+
+            Rect r = trackedObjects[i].lastPositions[n-1];
+            if(r.empty()) {
+                LOGE("DetectionBasedTracker::process: ERROR: ATTENTION: strange algorithm's behavior: trackedObjects[i].rect() is empty");
+                continue;
+            }
+
+            //correction by speed of rectangle
+            if (n > 1) {
+                Point2f center = centerRect(r);
+                Point2f center_prev = centerRect(trackedObjects[i].lastPositions[n-2]);
+                Point2f shift = (center - center_prev) * innerParameters.coeffObjectSpeedUsingInPrediction;
+
+                r.x += cvRound(shift.x);
+                r.y += cvRound(shift.y);
+            }
+
+
+            rectsWhereRegions.push_back(r);
+        }
+    }
+    LOGI("DetectionBasedTracker::process: tracked objects num==%d", (int)trackedObjects.size());
+
+    std::vector<Rect> detectedObjectsInRegions;
+
+    LOGD("DetectionBasedTracker::process: rectsWhereRegions.size()=%d", (int)rectsWhereRegions.size());
+    for(size_t i=0; i < rectsWhereRegions.size(); i++) {
+        Rect r = rectsWhereRegions[i];
+
+        detectInRegion(imageDetect, r, detectedObjectsInRegions);
+    }
+    LOGD("DetectionBasedTracker::process: detectedObjectsInRegions.size()=%d", (int)detectedObjectsInRegions.size());
+
+    updateTrackedObjects(detectedObjectsInRegions);
+}
+
+void cv::DetectionBasedTracker::getObjects(std::vector<cv::Rect>& result) const
+{
+    result.clear();
+
+    for(size_t i=0; i < trackedObjects.size(); i++) {
+        Rect r=calcTrackedObjectPositionToShow((int)i);
+        if (r.empty()) {
+            continue;
+        }
+        result.push_back(r);
+        LOGD("DetectionBasedTracker::process: found a object with SIZE %d x %d, rect={%d, %d, %d x %d}", r.width, r.height, r.x, r.y, r.width, r.height);
+    }
+}
+
+void cv::DetectionBasedTracker::getObjects(std::vector<Object>& result) const
+{
+    result.clear();
+
+    for(size_t i=0; i < trackedObjects.size(); i++) {
+        Rect r=calcTrackedObjectPositionToShow((int)i);
+        if (r.empty()) {
+            continue;
+        }
+        result.push_back(Object(r, trackedObjects[i].id));
+        LOGD("DetectionBasedTracker::process: found a object with SIZE %d x %d, rect={%d, %d, %d x %d}", r.width, r.height, r.x, r.y, r.width, r.height);
+    }
+}
+void cv::DetectionBasedTracker::getObjects(std::vector<ExtObject>& result) const
+{
+    result.clear();
+
+    for(size_t i=0; i < trackedObjects.size(); i++) {
+        ObjectStatus status;
+        Rect r=calcTrackedObjectPositionToShow((int)i, status);
+        result.push_back(ExtObject(trackedObjects[i].id, r, status));
+        LOGD("DetectionBasedTracker::process: found a object with SIZE %d x %d, rect={%d, %d, %d x %d}, status = %d", r.width, r.height, r.x, r.y, r.width, r.height, (int)status);
+    }
+}
+
+bool cv::DetectionBasedTracker::run()
+{
+    if (separateDetectionWork) {
+        return separateDetectionWork->run();
+    }
+    return false;
+}
+
+void cv::DetectionBasedTracker::stop()
+{
+    if (separateDetectionWork) {
+        separateDetectionWork->stop();
+    }
+}
+
+void cv::DetectionBasedTracker::resetTracking()
+{
+    if (separateDetectionWork) {
+        separateDetectionWork->resetTracking();
+    }
+    trackedObjects.clear();
+}
+
+void cv::DetectionBasedTracker::updateTrackedObjects(const std::vector<Rect>& detectedObjects)
+{
+    enum {
+        NEW_RECTANGLE=-1,
+        INTERSECTED_RECTANGLE=-2
+    };
+
+    int N1=(int)trackedObjects.size();
+    int N2=(int)detectedObjects.size();
+    LOGD("DetectionBasedTracker::updateTrackedObjects: N1=%d, N2=%d", N1, N2);
+
+    for(int i=0; i < N1; i++) {
+        trackedObjects[i].numDetectedFrames++;
+    }
+
+    std::vector<int> correspondence(detectedObjects.size(), NEW_RECTANGLE);
+    correspondence.clear();
+    correspondence.resize(detectedObjects.size(), NEW_RECTANGLE);
+
+    for(int i=0; i < N1; i++) {
+        LOGD("DetectionBasedTracker::updateTrackedObjects: i=%d", i);
+        TrackedObject& curObject=trackedObjects[i];
+
+        int bestIndex=-1;
+        int bestArea=-1;
+
+        int numpositions=(int)curObject.lastPositions.size();
+        CV_Assert(numpositions > 0);
+        Rect prevRect=curObject.lastPositions[numpositions-1];
+        LOGD("DetectionBasedTracker::updateTrackedObjects: prevRect[%d]={%d, %d, %d x %d}", i, prevRect.x, prevRect.y, prevRect.width, prevRect.height);
+
+        for(int j=0; j < N2; j++) {
+            LOGD("DetectionBasedTracker::updateTrackedObjects: j=%d", j);
+            if (correspondence[j] >= 0) {
+                LOGD("DetectionBasedTracker::updateTrackedObjects: j=%d is rejected, because it has correspondence=%d", j, correspondence[j]);
+                continue;
+            }
+            if (correspondence[j] !=NEW_RECTANGLE) {
+                LOGD("DetectionBasedTracker::updateTrackedObjects: j=%d is rejected, because it is intersected with another rectangle", j);
+                continue;
+            }
+            LOGD("DetectionBasedTracker::updateTrackedObjects: detectedObjects[%d]={%d, %d, %d x %d}",
+                    j, detectedObjects[j].x, detectedObjects[j].y, detectedObjects[j].width, detectedObjects[j].height);
+
+            Rect r=prevRect & detectedObjects[j];
+            if ( (r.width > 0) && (r.height > 0) ) {
+                LOGD("DetectionBasedTracker::updateTrackedObjects: There is intersection between prevRect and detectedRect, r={%d, %d, %d x %d}",
+                        r.x, r.y, r.width, r.height);
+                correspondence[j]=INTERSECTED_RECTANGLE;
+
+                if ( r.area() > bestArea) {
+                    LOGD("DetectionBasedTracker::updateTrackedObjects: The area of intersection is %d, it is better than bestArea=%d", r.area(), bestArea);
+                    bestIndex=j;
+                    bestArea=r.area();
+                }
+            }
+        }
+        if (bestIndex >= 0) {
+            LOGD("DetectionBasedTracker::updateTrackedObjects: The best correspondence for i=%d is j=%d", i, bestIndex);
+            correspondence[bestIndex]=i;
+
+            for(int j=0; j < N2; j++) {
+                if (correspondence[j] >= 0)
+                    continue;
+
+                Rect r=detectedObjects[j] & detectedObjects[bestIndex];
+                if ( (r.width > 0) && (r.height > 0) ) {
+                    LOGD("DetectionBasedTracker::updateTrackedObjects: Found intersection between "
+                            "rectangles j=%d and bestIndex=%d, rectangle j=%d is marked as intersected", j, bestIndex, j);
+                    correspondence[j]=INTERSECTED_RECTANGLE;
+                }
+            }
+        } else {
+            LOGD("DetectionBasedTracker::updateTrackedObjects: There is no correspondence for i=%d ", i);
+            curObject.numFramesNotDetected++;
+        }
+    }
+
+    LOGD("DetectionBasedTracker::updateTrackedObjects: start second cycle");
+    for(int j=0; j < N2; j++) {
+        LOGD("DetectionBasedTracker::updateTrackedObjects: j=%d", j);
+        int i=correspondence[j];
+        if (i >= 0) {//add position
+            LOGD("DetectionBasedTracker::updateTrackedObjects: add position");
+            trackedObjects[i].lastPositions.push_back(detectedObjects[j]);
+            while ((int)trackedObjects[i].lastPositions.size() > (int) innerParameters.numLastPositionsToTrack) {
+                trackedObjects[i].lastPositions.erase(trackedObjects[i].lastPositions.begin());
+            }
+            trackedObjects[i].numFramesNotDetected=0;
+        } else if (i==NEW_RECTANGLE){ //new object
+            LOGD("DetectionBasedTracker::updateTrackedObjects: new object");
+            trackedObjects.push_back(detectedObjects[j]);
+        } else {
+            LOGD("DetectionBasedTracker::updateTrackedObjects: was auxiliary intersection");
+        }
+    }
+
+    std::vector<TrackedObject>::iterator it=trackedObjects.begin();
+    while( it != trackedObjects.end() ) {
+        if ( (it->numFramesNotDetected > parameters.maxTrackLifetime)
+                ||
+                (
+                 (it->numDetectedFrames <= innerParameters.numStepsToWaitBeforeFirstShow)
+                 &&
+                 (it->numFramesNotDetected > innerParameters.numStepsToTrackWithoutDetectingIfObjectHasNotBeenShown)
+                )
+           )
+        {
+            int numpos=(int)it->lastPositions.size();
+            CV_Assert(numpos > 0);
+            Rect r = it->lastPositions[numpos-1];
+            (void)(r);
+            LOGD("DetectionBasedTracker::updateTrackedObjects: deleted object {%d, %d, %d x %d}",
+                    r.x, r.y, r.width, r.height);
+            it=trackedObjects.erase(it);
+        } else {
+            it++;
+        }
+    }
+}
+
+int cv::DetectionBasedTracker::addObject(const Rect& location)
+{
+    LOGD("DetectionBasedTracker::addObject: new object {%d, %d %dx%d}",location.x, location.y, location.width, location.height);
+    trackedObjects.push_back(TrackedObject(location));
+    int newId = trackedObjects.back().id;
+    LOGD("DetectionBasedTracker::addObject: newId = %d", newId);
+    return newId;
+}
+
+Rect cv::DetectionBasedTracker::calcTrackedObjectPositionToShow(int i) const
+{
+    ObjectStatus status;
+    return calcTrackedObjectPositionToShow(i, status);
+}
+Rect cv::DetectionBasedTracker::calcTrackedObjectPositionToShow(int i, ObjectStatus& status) const
+{
+    if ( (i < 0) || (i >= (int)trackedObjects.size()) ) {
+        LOGE("DetectionBasedTracker::calcTrackedObjectPositionToShow: ERROR: wrong i=%d", i);
+        status = WRONG_OBJECT;
+        return Rect();
+    }
+    if (trackedObjects[i].numDetectedFrames <= innerParameters.numStepsToWaitBeforeFirstShow){
+        LOGI("DetectionBasedTracker::calcTrackedObjectPositionToShow: trackedObjects[%d].numDetectedFrames=%d <= numStepsToWaitBeforeFirstShow=%d --- return empty Rect()",
+                i, trackedObjects[i].numDetectedFrames, innerParameters.numStepsToWaitBeforeFirstShow);
+        status = DETECTED_NOT_SHOWN_YET;
+        return Rect();
+    }
+    if (trackedObjects[i].numFramesNotDetected > innerParameters.numStepsToShowWithoutDetecting) {
+        status = DETECTED_TEMPORARY_LOST;
+        return Rect();
+    }
+
+    const TrackedObject::PositionsVector& lastPositions=trackedObjects[i].lastPositions;
+
+    int N=(int)lastPositions.size();
+    if (N<=0) {
+        LOGE("DetectionBasedTracker::calcTrackedObjectPositionToShow: ERROR: no positions for i=%d", i);
+        status = WRONG_OBJECT;
+        return Rect();
+    }
+
+    int Nsize=std::min(N, (int)weightsSizesSmoothing.size());
+    int Ncenter= std::min(N, (int)weightsPositionsSmoothing.size());
+
+    Point2f center;
+    double w=0, h=0;
+    if (Nsize > 0) {
+        double sum=0;
+        for(int j=0; j < Nsize; j++) {
+            int k=N-j-1;
+            w += lastPositions[k].width  * weightsSizesSmoothing[j];
+            h += lastPositions[k].height * weightsSizesSmoothing[j];
+            sum+=weightsSizesSmoothing[j];
+        }
+        w /= sum;
+        h /= sum;
+    } else {
+        w=lastPositions[N-1].width;
+        h=lastPositions[N-1].height;
+    }
+
+    if (Ncenter > 0) {
+        double sum=0;
+        for(int j=0; j < Ncenter; j++) {
+            int k=N-j-1;
+            Point tl(lastPositions[k].tl());
+            Point br(lastPositions[k].br());
+            Point2f c1;
+            c1=tl;
+            c1=c1* 0.5f;
+            Point2f c2;
+            c2=br;
+            c2=c2*0.5f;
+            c1=c1+c2;
+
+            center=center+  (c1  * weightsPositionsSmoothing[j]);
+            sum+=weightsPositionsSmoothing[j];
+        }
+        center *= (float)(1 / sum);
+    } else {
+        int k=N-1;
+        Point tl(lastPositions[k].tl());
+        Point br(lastPositions[k].br());
+        Point2f c1;
+        c1=tl;
+        c1=c1* 0.5f;
+        Point2f c2;
+        c2=br;
+        c2=c2*0.5f;
+
+        center=c1+c2;
+    }
+    Point2f tl=center-Point2f((float)w*0.5f,(float)h*0.5f);
+    Rect res(cvRound(tl.x), cvRound(tl.y), cvRound(w), cvRound(h));
+    LOGD("DetectionBasedTracker::calcTrackedObjectPositionToShow: Result for i=%d: {%d, %d, %d x %d}", i, res.x, res.y, res.width, res.height);
+
+    status = DETECTED;
+    return res;
+}
+
+void cv::DetectionBasedTracker::detectInRegion(const Mat& img, const Rect& r, std::vector<Rect>& detectedObjectsInRegions)
+{
+    Rect r0(Point(), img.size());
+    Rect r1 = scale_rect(r, innerParameters.coeffTrackingWindowSize);
+    r1 = r1 & r0;
+
+    if ( (r1.width <=0) || (r1.height <= 0) ) {
+        LOGD("DetectionBasedTracker::detectInRegion: Empty intersection");
+        return;
+    }
+
+    int d = cvRound(std::min(r.width, r.height) * innerParameters.coeffObjectSizeToTrack);
+
+    std::vector<Rect> tmpobjects;
+
+    Mat img1(img, r1);//subimage for rectangle -- without data copying
+    LOGD("DetectionBasedTracker::detectInRegion: img1.size()=%d x %d, d=%d",
+            img1.size().width, img1.size().height, d);
+
+    cascadeForTracking->setMinObjectSize(Size(d, d));
+    cascadeForTracking->detect(img1, tmpobjects);
+            /*
+            detectMultiScale( img1, tmpobjects,
+            parameters.scaleFactor, parameters.minNeighbors, 0
+            |CV_HAAR_FIND_BIGGEST_OBJECT
+            |CV_HAAR_SCALE_IMAGE
+            ,
+            Size(d,d),
+            max_objectSize
+            );*/
+
+    for(size_t i=0; i < tmpobjects.size(); i++) {
+        Rect curres(tmpobjects[i].tl() + r1.tl(), tmpobjects[i].size());
+        detectedObjectsInRegions.push_back(curres);
+    }
+}
+
+bool cv::DetectionBasedTracker::setParameters(const Parameters& params)
+{
+    if ( params.maxTrackLifetime < 0 )
+    {
+        LOGE("DetectionBasedTracker::setParameters: ERROR: wrong parameters value");
+        return false;
+    }
+
+    if (separateDetectionWork) {
+        separateDetectionWork->setParameters(params);
+    }
+    parameters=params;
+    return true;
+}
+
+const cv::DetectionBasedTracker::Parameters& DetectionBasedTracker::getParameters() const
+{
+    return parameters;
+}
--- a/Lib/opencv/sources/modules/objdetect/src/hog.cpp
+++ b/Lib/opencv/sources/modules/objdetect/src/hog.cpp
--- a/Lib/opencv/sources/modules/objdetect/src/main.cpp
+++ b/Lib/opencv/sources/modules/objdetect/src/main.cpp
@@ -0,0 +1,52 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+//
+// Library initialization file
+//
+
+#include "precomp.hpp"
+
+IPP_INITIALIZER_AUTO
+
+/* End of file. */
--- a/Lib/opencv/sources/modules/objdetect/src/opencl/cascadedetect.cl
+++ b/Lib/opencv/sources/modules/objdetect/src/opencl/cascadedetect.cl
@@ -0,0 +1,661 @@
+///////////////////////////// OpenCL kernels for face detection //////////////////////////////
+////////////////////////////// see the opencv/doc/license.txt ///////////////////////////////
+
+//
+// the code has been derived from the OpenCL Haar cascade kernel by
+//
+//    Niko Li, newlife20080214@gmail.com
+//    Wang Weiyan, wangweiyanster@gmail.com
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//    Nathan, liujun@multicorewareinc.com
+//    Peng Xiao, pengxiao@outlook.com
+//    Erping Pang, erping@multicorewareinc.com
+//
+
+#ifdef HAAR
+typedef struct __attribute__((aligned(4))) OptHaarFeature
+{
+    int4 ofs[3] __attribute__((aligned (4)));
+    float4 weight __attribute__((aligned (4)));
+}
+OptHaarFeature;
+#endif
+
+#ifdef LBP
+typedef struct __attribute__((aligned(4))) OptLBPFeature
+{
+    int16 ofs __attribute__((aligned (4)));
+}
+OptLBPFeature;
+#endif
+
+typedef struct __attribute__((aligned(4))) Stump
+{
+    float4 st __attribute__((aligned (4)));
+}
+Stump;
+
+typedef struct __attribute__((aligned(4))) Node
+{
+    int4 n __attribute__((aligned (4)));
+}
+Node;
+
+typedef struct __attribute__((aligned (4))) Stage
+{
+    int first __attribute__((aligned (4)));
+    int ntrees __attribute__((aligned (4)));
+    float threshold __attribute__((aligned (4)));
+}
+Stage;
+
+typedef struct __attribute__((aligned (4))) ScaleData
+{
+    float scale __attribute__((aligned (4)));
+    int szi_width __attribute__((aligned (4)));
+    int szi_height __attribute__((aligned (4)));
+    int layer_ofs __attribute__((aligned (4)));
+    int ystep __attribute__((aligned (4)));
+}
+ScaleData;
+
+#ifndef SUM_BUF_SIZE
+#define SUM_BUF_SIZE 0
+#endif
+
+#ifndef NODE_COUNT
+#define NODE_COUNT 1
+#endif
+
+#ifdef HAAR
+__kernel __attribute__((reqd_work_group_size(LOCAL_SIZE_X,LOCAL_SIZE_Y,1)))
+void runHaarClassifier(
+    int nscales, __global const ScaleData* scaleData,
+    __global const int* sum,
+    int _sumstep, int sumoffset,
+    __global const OptHaarFeature* optfeatures,
+    __global const Stage* stages,
+    __global const Node* nodes,
+    __global const float* leaves0,
+
+    volatile __global int* facepos,
+    int4 normrect, int sqofs, int2 windowsize)
+{
+    int lx = get_local_id(0);
+    int ly = get_local_id(1);
+    int groupIdx = get_group_id(0);
+    int i, ngroups = get_global_size(0)/LOCAL_SIZE_X;
+    int scaleIdx, tileIdx, stageIdx;
+    int sumstep = (int)(_sumstep/sizeof(int));
+    int4 nofs0 = (int4)(mad24(normrect.y, sumstep, normrect.x),
+                        mad24(normrect.y, sumstep, normrect.x + normrect.z),
+                        mad24(normrect.y + normrect.w, sumstep, normrect.x),
+                        mad24(normrect.y + normrect.w, sumstep, normrect.x + normrect.z));
+    int normarea = normrect.z * normrect.w;
+    float invarea = 1.f/normarea;
+    int lidx = ly*LOCAL_SIZE_X + lx;
+
+    #if SUM_BUF_SIZE > 0
+    int4 nofs = (int4)(mad24(normrect.y, SUM_BUF_STEP, normrect.x),
+                       mad24(normrect.y, SUM_BUF_STEP, normrect.x + normrect.z),
+                       mad24(normrect.y + normrect.w, SUM_BUF_STEP, normrect.x),
+                       mad24(normrect.y + normrect.w, SUM_BUF_STEP, normrect.x + normrect.z));
+    #else
+    int4 nofs = nofs0;
+    #endif
+    #define LOCAL_SIZE (LOCAL_SIZE_X*LOCAL_SIZE_Y)
+    __local int lstore[SUM_BUF_SIZE + LOCAL_SIZE*5/2+1];
+    #if SUM_BUF_SIZE > 0
+    __local int* ibuf = lstore;
+    __local int* lcount = ibuf + SUM_BUF_SIZE;
+    #else
+    __local int* lcount = lstore;
+    #endif
+    __local float* lnf = (__local float*)(lcount + 1);
+    __local float* lpartsum = lnf + LOCAL_SIZE;
+    __local short* lbuf = (__local short*)(lpartsum + LOCAL_SIZE);
+
+    for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
+    {
+        __global const ScaleData* s = scaleData + scaleIdx;
+        int ystep = s->ystep;
+        int2 worksize = (int2)(max(s->szi_width - windowsize.x, 0), max(s->szi_height - windowsize.y, 0));
+        int2 ntiles = (int2)((worksize.x + LOCAL_SIZE_X-1)/LOCAL_SIZE_X,
+                             (worksize.y + LOCAL_SIZE_Y-1)/LOCAL_SIZE_Y);
+        int totalTiles = ntiles.x*ntiles.y;
+
+        for( tileIdx = groupIdx; tileIdx < totalTiles; tileIdx += ngroups )
+        {
+            int ix0 = (tileIdx % ntiles.x)*LOCAL_SIZE_X;
+            int iy0 = (tileIdx / ntiles.x)*LOCAL_SIZE_Y;
+            int ix = lx, iy = ly;
+            __global const int* psum0 = sum + mad24(iy0, sumstep, ix0) + s->layer_ofs;
+            __global const int* psum1 = psum0 + mad24(iy, sumstep, ix);
+
+            if( ix0 >= worksize.x || iy0 >= worksize.y )
+                continue;
+            #if SUM_BUF_SIZE > 0
+            for( i = lidx*4; i < SUM_BUF_SIZE; i += LOCAL_SIZE_X*LOCAL_SIZE_Y*4 )
+            {
+                int dy = i/SUM_BUF_STEP, dx = i - dy*SUM_BUF_STEP;
+                vstore4(vload4(0, psum0 + mad24(dy, sumstep, dx)), 0, ibuf+i);
+            }
+            #endif
+
+            if( lidx == 0 )
+                lcount[0] = 0;
+            barrier(CLK_LOCAL_MEM_FENCE);
+
+            if( ix0 + ix < worksize.x && iy0 + iy < worksize.y )
+            {
+                #if NODE_COUNT==1
+                __global const Stump* stump = (__global const Stump*)nodes;
+                #else
+                __global const Node* node = nodes;
+                __global const float* leaves = leaves0;
+                #endif
+                #if SUM_BUF_SIZE > 0
+                __local const int* psum = ibuf + mad24(iy, SUM_BUF_STEP, ix);
+                #else
+                __global const int* psum = psum1;
+                #endif
+
+                __global const int* psqsum = (__global const int*)(psum1 + sqofs);
+                float sval = (psum[nofs.x] - psum[nofs.y] - psum[nofs.z] + psum[nofs.w])*invarea;
+                float sqval = (psqsum[nofs0.x] - psqsum[nofs0.y] - psqsum[nofs0.z] + psqsum[nofs0.w])*invarea;
+                float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
+                nf = nf > 0 ? nf : 1.f;
+
+                for( stageIdx = 0; stageIdx < SPLIT_STAGE; stageIdx++ )
+                {
+                    int ntrees = stages[stageIdx].ntrees;
+                    float s = 0.f;
+                    #if NODE_COUNT==1
+                    for( i = 0; i < ntrees; i++ )
+                    {
+                        float4 st = stump[i].st;
+                        __global const OptHaarFeature* f = optfeatures + as_int(st.x);
+                        float4 weight = f->weight;
+
+                        int4 ofs = f->ofs[0];
+                        sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
+                        ofs = f->ofs[1];
+                        sval = mad((float)(psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
+                        if( weight.z > 0 )
+                        {
+                            ofs = f->ofs[2];
+                            sval = mad((float)(psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
+                        }
+
+                        s += (sval < st.y*nf) ? st.z : st.w;
+                    }
+                    stump += ntrees;
+                    #else
+                    for( i = 0; i < ntrees; i++, node += NODE_COUNT, leaves += NODE_COUNT+1 )
+                    {
+                        int idx = 0;
+                        do
+                        {
+                            int4 n = node[idx].n;
+                            __global const OptHaarFeature* f = optfeatures + n.x;
+                            float4 weight = f->weight;
+
+                            int4 ofs = f->ofs[0];
+
+                            sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
+                            ofs = f->ofs[1];
+                            sval = mad((float)(psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
+                            if( weight.z > 0 )
+                            {
+                                ofs = f->ofs[2];
+                                sval = mad((float)(psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
+                            }
+
+                            idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
+                        }
+                        while(idx > 0);
+                        s += leaves[-idx];
+                    }
+                    #endif
+
+                    if( s < stages[stageIdx].threshold )
+                        break;
+                }
+
+                if( stageIdx == SPLIT_STAGE && (ystep == 1 || ((ix | iy) & 1) == 0) )
+                {
+                    int count = atomic_inc(lcount);
+                    lbuf[count] = (int)(ix | (iy << 8));
+                    lnf[count] = nf;
+                }
+            }
+
+            for( stageIdx = SPLIT_STAGE; stageIdx < N_STAGES; stageIdx++ )
+            {
+                barrier(CLK_LOCAL_MEM_FENCE);
+                int nrects = lcount[0];
+
+                if( nrects == 0 )
+                    break;
+                barrier(CLK_LOCAL_MEM_FENCE);
+                if( lidx == 0 )
+                    lcount[0] = 0;
+
+                {
+                    #if NODE_COUNT == 1
+                    __global const Stump* stump = (__global const Stump*)nodes + stages[stageIdx].first;
+                    #else
+                    __global const Node* node = nodes + stages[stageIdx].first*NODE_COUNT;
+                    __global const float* leaves = leaves0 + stages[stageIdx].first*(NODE_COUNT+1);
+                    #endif
+                    int nparts = LOCAL_SIZE / nrects;
+                    int ntrees = stages[stageIdx].ntrees;
+                    int ntrees_p = (ntrees + nparts - 1)/nparts;
+                    int nr = lidx / nparts;
+                    int partidx = -1, idxval = 0;
+                    float partsum = 0.f, nf = 0.f;
+
+                    if( nr < nrects )
+                    {
+                        partidx = lidx % nparts;
+                        idxval = lbuf[nr];
+                        nf = lnf[nr];
+
+                        {
+                        int ntrees0 = ntrees_p*partidx;
+                        int ntrees1 = min(ntrees0 + ntrees_p, ntrees);
+                        int ix1 = idxval & 255, iy1 = idxval >> 8;
+                        #if SUM_BUF_SIZE > 0
+                        __local const int* psum = ibuf + mad24(iy1, SUM_BUF_STEP, ix1);
+                        #else
+                        __global const int* psum = psum0 + mad24(iy1, sumstep, ix1);
+                        #endif
+
+                        #if NODE_COUNT == 1
+                        for( i = ntrees0; i < ntrees1; i++ )
+                        {
+                            float4 st = stump[i].st;
+                            __global const OptHaarFeature* f = optfeatures + as_int(st.x);
+                            float4 weight = f->weight;
+
+                            int4 ofs = f->ofs[0];
+                            float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
+                            ofs = f->ofs[1];
+                            sval = mad((float)(psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
+                            //if( weight.z > 0 )
+                            if( fabs(weight.z) > 0 )
+                            {
+                                ofs = f->ofs[2];
+                                sval = mad((float)(psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
+                            }
+
+                            partsum += (sval < st.y*nf) ? st.z : st.w;
+                        }
+                        #else
+                        for( i = ntrees0; i < ntrees1; i++ )
+                        {
+                            int idx = 0;
+                            do
+                            {
+                                int4 n = node[i*2 + idx].n;
+                                __global const OptHaarFeature* f = optfeatures + n.x;
+                                float4 weight = f->weight;
+                                int4 ofs = f->ofs[0];
+
+                                float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
+                                ofs = f->ofs[1];
+                                sval = mad((float)(psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
+                                if( weight.z > 0 )
+                                {
+                                    ofs = f->ofs[2];
+                                    sval = mad((float)(psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
+                                }
+
+                                idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
+                            }
+                            while(idx > 0);
+                            partsum += leaves[i*3-idx];
+                        }
+                        #endif
+                        }
+                    }
+                    lpartsum[lidx] = partsum;
+                    barrier(CLK_LOCAL_MEM_FENCE);
+
+                    if( partidx == 0 )
+                    {
+                        float s = lpartsum[nr*nparts];
+                        for( i = 1; i < nparts; i++ )
+                            s += lpartsum[i + nr*nparts];
+                        if( s >= stages[stageIdx].threshold )
+                        {
+                            int count = atomic_inc(lcount);
+                            lbuf[count] = idxval;
+                            lnf[count] = nf;
+                        }
+                    }
+                }
+            }
+
+            barrier(CLK_LOCAL_MEM_FENCE);
+            if( stageIdx == N_STAGES )
+            {
+                int nrects = lcount[0];
+                if( lidx < nrects )
+                {
+                    int nfaces = atomic_inc(facepos);
+                    if( nfaces < MAX_FACES )
+                    {
+                        volatile __global int* face = facepos + 1 + nfaces*3;
+                        int val = lbuf[lidx];
+                        face[0] = scaleIdx;
+                        face[1] = ix0 + (val & 255);
+                        face[2] = iy0 + (val >> 8);
+                    }
+                }
+            }
+        }
+    }
+}
+#endif
+
+#ifdef LBP
+#undef CALC_SUM_OFS_
+#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
+    ((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
+
+__kernel void runLBPClassifierStumpSimple(
+    int nscales, __global const ScaleData* scaleData,
+    __global const int* sum,
+    int _sumstep, int sumoffset,
+    __global const OptLBPFeature* optfeatures,
+    __global const Stage* stages,
+    __global const Stump* stumps,
+    __global const int* bitsets,
+    int bitsetSize,
+
+    volatile __global int* facepos,
+    int2 windowsize)
+{
+    int lx = get_local_id(0);
+    int ly = get_local_id(1);
+    int local_size_x = get_local_size(0);
+    int local_size_y = get_local_size(1);
+    int groupIdx = get_group_id(1)*get_num_groups(0) + get_group_id(0);
+    int ngroups = get_num_groups(0)*get_num_groups(1);
+    int scaleIdx, tileIdx, stageIdx;
+    int sumstep = (int)(_sumstep/sizeof(int));
+
+    for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
+    {
+        __global const ScaleData* s = scaleData + scaleIdx;
+        int ystep = s->ystep;
+        int2 worksize = (int2)(max(s->szi_width - windowsize.x, 0), max(s->szi_height - windowsize.y, 0));
+        int2 ntiles = (int2)((worksize.x/ystep + local_size_x-1)/local_size_x,
+                             (worksize.y/ystep + local_size_y-1)/local_size_y);
+        int totalTiles = ntiles.x*ntiles.y;
+
+        for( tileIdx = groupIdx; tileIdx < totalTiles; tileIdx += ngroups )
+        {
+            int iy = mad24((tileIdx / ntiles.x), local_size_y, ly) * ystep;
+            int ix = mad24((tileIdx % ntiles.x), local_size_x, lx) * ystep;
+
+            if( ix < worksize.x && iy < worksize.y )
+            {
+                __global const int* p = sum + mad24(iy, sumstep, ix) + s->layer_ofs;
+                __global const Stump* stump = stumps;
+                __global const int* bitset = bitsets;
+
+                for( stageIdx = 0; stageIdx < N_STAGES; stageIdx++ )
+                {
+                    int i, ntrees = stages[stageIdx].ntrees;
+                    float s = 0.f;
+                    for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
+                    {
+                        float4 st = stump->st;
+                        __global const OptLBPFeature* f = optfeatures + as_int(st.x);
+                        int16 ofs = f->ofs;
+
+                        int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
+
+                        int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
+                        idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
+                        idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
+
+                        mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
+                        mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0);  // 8
+                        mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0);  // 7
+                        mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0);  // 6
+                        mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0);  // 7
+
+                        s += (bitset[idx] & (1 << mask)) ? st.z : st.w;
+                    }
+
+                    if( s < stages[stageIdx].threshold )
+                        break;
+                }
+
+                if( stageIdx == N_STAGES )
+                {
+                    int nfaces = atomic_inc(facepos);
+                    if( nfaces < MAX_FACES )
+                    {
+                        volatile __global int* face = facepos + 1 + nfaces*3;
+                        face[0] = scaleIdx;
+                        face[1] = ix;
+                        face[2] = iy;
+                    }
+                }
+            }
+        }
+    }
+}
+
+__kernel __attribute__((reqd_work_group_size(LOCAL_SIZE_X,LOCAL_SIZE_Y,1)))
+void runLBPClassifierStump(
+    int nscales, __global const ScaleData* scaleData,
+    __global const int* sum,
+    int _sumstep, int sumoffset,
+    __global const OptLBPFeature* optfeatures,
+    __global const Stage* stages,
+    __global const Stump* stumps,
+    __global const int* bitsets,
+    int bitsetSize,
+
+    volatile __global int* facepos,
+    int2 windowsize)
+{
+    int lx = get_local_id(0);
+    int ly = get_local_id(1);
+    int groupIdx = get_group_id(0);
+    int i, ngroups = get_global_size(0)/LOCAL_SIZE_X;
+    int scaleIdx, tileIdx, stageIdx;
+    int sumstep = (int)(_sumstep/sizeof(int));
+    int lidx = ly*LOCAL_SIZE_X + lx;
+
+    #define LOCAL_SIZE (LOCAL_SIZE_X*LOCAL_SIZE_Y)
+    __local int lstore[SUM_BUF_SIZE + LOCAL_SIZE*3/2+1];
+    #if SUM_BUF_SIZE > 0
+    __local int* ibuf = lstore;
+    __local int* lcount = ibuf + SUM_BUF_SIZE;
+    #else
+    __local int* lcount = lstore;
+    #endif
+    __local float* lpartsum = (__local float*)(lcount + 1);
+    __local short* lbuf = (__local short*)(lpartsum + LOCAL_SIZE);
+
+    for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
+    {
+        __global const ScaleData* s = scaleData + scaleIdx;
+        int ystep = s->ystep;
+        int2 worksize = (int2)(max(s->szi_width - windowsize.x, 0), max(s->szi_height - windowsize.y, 0));
+        int2 ntiles = (int2)((worksize.x + LOCAL_SIZE_X-1)/LOCAL_SIZE_X,
+                             (worksize.y + LOCAL_SIZE_Y-1)/LOCAL_SIZE_Y);
+        int totalTiles = ntiles.x*ntiles.y;
+
+        for( tileIdx = groupIdx; tileIdx < totalTiles; tileIdx += ngroups )
+        {
+            int ix0 = (tileIdx % ntiles.x)*LOCAL_SIZE_X;
+            int iy0 = (tileIdx / ntiles.x)*LOCAL_SIZE_Y;
+            int ix = lx, iy = ly;
+            __global const int* psum0 = sum + mad24(iy0, sumstep, ix0) + s->layer_ofs;
+
+            if( ix0 >= worksize.x || iy0 >= worksize.y )
+                continue;
+            #if SUM_BUF_SIZE > 0
+            for( i = lidx*4; i < SUM_BUF_SIZE; i += LOCAL_SIZE_X*LOCAL_SIZE_Y*4 )
+            {
+                int dy = i/SUM_BUF_STEP, dx = i - dy*SUM_BUF_STEP;
+                vstore4(vload4(0, psum0 + mad24(dy, sumstep, dx)), 0, ibuf+i);
+            }
+            barrier(CLK_LOCAL_MEM_FENCE);
+            #endif
+
+            if( lidx == 0 )
+                lcount[0] = 0;
+            barrier(CLK_LOCAL_MEM_FENCE);
+
+            if( ix0 + ix < worksize.x && iy0 + iy < worksize.y )
+            {
+                __global const Stump* stump = stumps;
+                __global const int* bitset = bitsets;
+                #if SUM_BUF_SIZE > 0
+                __local const int* p = ibuf + mad24(iy, SUM_BUF_STEP, ix);
+                #else
+                __global const int* p = psum0 + mad24(iy, sumstep, ix);
+                #endif
+
+                for( stageIdx = 0; stageIdx < SPLIT_STAGE; stageIdx++ )
+                {
+                    int ntrees = stages[stageIdx].ntrees;
+                    float s = 0.f;
+                    for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
+                    {
+                        float4 st = stump->st;
+                        __global const OptLBPFeature* f = optfeatures + as_int(st.x);
+                        int16 ofs = f->ofs;
+
+                        int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
+
+                        int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
+                        idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
+                        idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
+
+                        mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
+                        mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0);  // 8
+                        mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0);  // 7
+                        mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0);  // 6
+                        mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0);  // 7
+
+                        s += (bitset[idx] & (1 << mask)) ? st.z : st.w;
+                    }
+
+                    if( s < stages[stageIdx].threshold )
+                        break;
+                }
+
+                if( stageIdx == SPLIT_STAGE && (ystep == 1 || ((ix | iy) & 1) == 0) )
+                {
+                    int count = atomic_inc(lcount);
+                    lbuf[count] = (int)(ix | (iy << 8));
+                }
+            }
+
+            for( stageIdx = SPLIT_STAGE; stageIdx < N_STAGES; stageIdx++ )
+            {
+                int nrects = lcount[0];
+
+                barrier(CLK_LOCAL_MEM_FENCE);
+                if( nrects == 0 )
+                    break;
+                if( lidx == 0 )
+                    lcount[0] = 0;
+
+                {
+                    __global const Stump* stump = stumps + stages[stageIdx].first;
+                    __global const int* bitset = bitsets + stages[stageIdx].first*bitsetSize;
+                    int nparts = LOCAL_SIZE / nrects;
+                    int ntrees = stages[stageIdx].ntrees;
+                    int ntrees_p = (ntrees + nparts - 1)/nparts;
+                    int nr = lidx / nparts;
+                    int partidx = -1, idxval = 0;
+                    float partsum = 0.f, nf = 0.f;
+
+                    if( nr < nrects )
+                    {
+                        partidx = lidx % nparts;
+                        idxval = lbuf[nr];
+
+                        {
+                            int ntrees0 = ntrees_p*partidx;
+                            int ntrees1 = min(ntrees0 + ntrees_p, ntrees);
+                            int ix1 = idxval & 255, iy1 = idxval >> 8;
+                            #if SUM_BUF_SIZE > 0
+                            __local const int* p = ibuf + mad24(iy1, SUM_BUF_STEP, ix1);
+                            #else
+                            __global const int* p = psum0 + mad24(iy1, sumstep, ix1);
+                            #endif
+
+                            for( i = ntrees0; i < ntrees1; i++ )
+                            {
+                                float4 st = stump[i].st;
+                                __global const OptLBPFeature* f = optfeatures + as_int(st.x);
+                                int16 ofs = f->ofs;
+
+                                #define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
+                                    ((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
+
+                                int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
+
+                                int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
+                                idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
+                                idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
+
+                                mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
+                                mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0);  // 8
+                                mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0);  // 7
+                                mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0);  // 6
+                                mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0);  // 7
+
+                                partsum += (bitset[i*bitsetSize + idx] & (1 << mask)) ? st.z : st.w;
+                            }
+                        }
+                    }
+                    lpartsum[lidx] = partsum;
+                    barrier(CLK_LOCAL_MEM_FENCE);
+
+                    if( partidx == 0 )
+                    {
+                        float s = lpartsum[nr*nparts];
+                        for( i = 1; i < nparts; i++ )
+                            s += lpartsum[i + nr*nparts];
+                        if( s >= stages[stageIdx].threshold )
+                        {
+                            int count = atomic_inc(lcount);
+                            lbuf[count] = idxval;
+                        }
+                    }
+                }
+            }
+
+            barrier(CLK_LOCAL_MEM_FENCE);
+            if( stageIdx == N_STAGES )
+            {
+                int nrects = lcount[0];
+                if( lidx < nrects )
+                {
+                    int nfaces = atomic_inc(facepos);
+                    if( nfaces < MAX_FACES )
+                    {
+                        volatile __global int* face = facepos + 1 + nfaces*3;
+                        int val = lbuf[lidx];
+                        face[0] = scaleIdx;
+                        face[1] = ix0 + (val & 255);
+                        face[2] = iy0 + (val >> 8);
+                    }
+                }
+            }
+        }
+    }
+}
+#endif
--- a/Lib/opencv/sources/modules/objdetect/src/opencl/objdetect_hog.cl
+++ b/Lib/opencv/sources/modules/objdetect/src/opencl/objdetect_hog.cl
@@ -0,0 +1,722 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Wenju He, wenju@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#define CELL_WIDTH 8
+#define CELL_HEIGHT 8
+#define CELLS_PER_BLOCK_X 2
+#define CELLS_PER_BLOCK_Y 2
+#define NTHREADS 256
+#define CV_PI_F M_PI_F
+
+#ifdef INTEL_DEVICE
+#define QANGLE_TYPE     int
+#define QANGLE_TYPE2    int2
+#else
+#define QANGLE_TYPE     uchar
+#define QANGLE_TYPE2    uchar2
+#endif
+
+//----------------------------------------------------------------------------
+// Histogram computation
+// 12 threads for a cell, 12x4 threads per block
+// Use pre-computed gaussian and interp_weight lookup tables
+__kernel void compute_hists_lut_kernel(
+    const int cblock_stride_x, const int cblock_stride_y,
+    const int cnbins, const int cblock_hist_size, const int img_block_width,
+    const int blocks_in_group, const int blocks_total,
+    const int grad_quadstep, const int qangle_step,
+    __global const float* grad, __global const QANGLE_TYPE* qangle,
+    __global const float* gauss_w_lut,
+    __global float* block_hists, __local float* smem)
+{
+    const int lx = get_local_id(0);
+    const int lp = lx / 24; /* local group id */
+    const int gid = get_group_id(0) * blocks_in_group + lp;/* global group id */
+    const int gidY = gid / img_block_width;
+    const int gidX = gid - gidY * img_block_width;
+
+    const int lidX = lx - lp * 24;
+    const int lidY = get_local_id(1);
+
+    const int cell_x = lidX / 12;
+    const int cell_y = lidY;
+    const int cell_thread_x = lidX - cell_x * 12;
+
+    __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *
+        CELLS_PER_BLOCK_Y * 12 + CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y);
+    __local float* final_hist = hists + cnbins *
+        (CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12);
+
+    const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x;
+    const int offset_y = gidY * cblock_stride_y + (cell_y << 2);
+
+    __global const float* grad_ptr = (gid < blocks_total) ?
+        grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
+    __global const QANGLE_TYPE* qangle_ptr = (gid < blocks_total) ?
+        qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
+
+    __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
+        cell_thread_x;
+    for (int bin_id = 0; bin_id < cnbins; ++bin_id)
+        hist[bin_id * 48] = 0.f;
+
+    const int dist_x = -4 + cell_thread_x - 4 * cell_x;
+    const int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
+
+    const int dist_y_begin = -4 - 4 * lidY;
+    for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
+    {
+        float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
+        QANGLE_TYPE2 bin = (QANGLE_TYPE2) (qangle_ptr[0], qangle_ptr[1]);
+
+        grad_ptr += grad_quadstep;
+        qangle_ptr += qangle_step;
+
+        int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
+
+        int idx = (dist_center_y + 8) * 16 + (dist_center_x + 8);
+        float gaussian = gauss_w_lut[idx];
+        idx = (dist_y + 8) * 16 + (dist_x + 8);
+        float interp_weight = gauss_w_lut[256+idx];
+
+        hist[bin.x * 48] += gaussian * interp_weight * vote.x;
+        hist[bin.y * 48] += gaussian * interp_weight * vote.y;
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    volatile __local float* hist_ = hist;
+    for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48)
+    {
+        if (cell_thread_x < 6)
+            hist_[0] += hist_[6];
+        barrier(CLK_LOCAL_MEM_FENCE);
+        if (cell_thread_x < 3)
+            hist_[0] += hist_[3];
+#ifdef CPU
+        barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+        if (cell_thread_x == 0)
+            final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
+                hist_[0] + hist_[1] + hist_[2];
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
+    if ((tid < cblock_hist_size) && (gid < blocks_total))
+    {
+        __global float* block_hist = block_hists +
+            (gidY * img_block_width + gidX) * cblock_hist_size;
+        block_hist[tid] = final_hist[tid];
+    }
+}
+
+//-------------------------------------------------------------
+//  Normalization of histograms via L2Hys_norm
+//  optimized for the case of 9 bins
+__kernel void normalize_hists_36_kernel(__global float* block_hists,
+                                        const float threshold, __local float *squares)
+{
+    const int tid = get_local_id(0);
+    const int gid = get_global_id(0);
+    const int bid = tid / 36;      /* block-hist id, (0 - 6) */
+    const int boffset = bid * 36;  /* block-hist offset in the work-group */
+    const int hid = tid - boffset; /* histogram bin id, (0 - 35) */
+
+    float elem = block_hists[gid];
+    squares[tid] = elem * elem;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    __local float* smem = squares + boffset;
+    float sum = smem[hid];
+    if (hid < 18)
+        smem[hid] = sum = sum + smem[hid + 18];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (hid < 9)
+        smem[hid] = sum = sum + smem[hid + 9];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (hid < 4)
+        smem[hid] = sum + smem[hid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
+
+    elem = elem / (sqrt(sum) + 3.6f);
+    elem = min(elem, threshold);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    squares[tid] = elem * elem;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    sum = smem[hid];
+    if (hid < 18)
+      smem[hid] = sum = sum + smem[hid + 18];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (hid < 9)
+        smem[hid] = sum = sum + smem[hid + 9];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (hid < 4)
+        smem[hid] = sum + smem[hid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
+
+    block_hists[gid] = elem / (sqrt(sum) + 1e-3f);
+}
+
+//-------------------------------------------------------------
+//  Normalization of histograms via L2Hys_norm
+//
+inline float reduce_smem(volatile __local float* smem, int size)
+{
+    unsigned int tid = get_local_id(0);
+    float sum = smem[tid];
+
+    if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+#ifdef CPU
+    if (size >= 64) { if (tid < 32) smem[tid] = sum = sum + smem[tid + 32];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 32) { if (tid < 16) smem[tid] = sum = sum + smem[tid + 16];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 16) { if (tid < 8) smem[tid] = sum = sum + smem[tid + 8];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 8) { if (tid < 4) smem[tid] = sum = sum + smem[tid + 4];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 4) { if (tid < 2) smem[tid] = sum = sum + smem[tid + 2];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+    if (size >= 2) { if (tid < 1) smem[tid] = sum = sum + smem[tid + 1];
+        barrier(CLK_LOCAL_MEM_FENCE); }
+#else
+    if (tid < 32)
+    {
+        if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
+#if WAVE_SIZE < 32
+    } barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16) {
+#endif
+        if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
+        if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
+        if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
+        if (size >= 4) smem[tid] = sum = sum + smem[tid + 2];
+        if (size >= 2) smem[tid] = sum = sum + smem[tid + 1];
+    }
+#endif
+
+    return sum;
+}
+
+__kernel void normalize_hists_kernel(
+    const int nthreads, const int block_hist_size, const int img_block_width,
+    __global float* block_hists, const float threshold, __local float *squares)
+{
+    const int tid = get_local_id(0);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    __global float* hist = block_hists + (gidY * img_block_width + gidX) *
+        block_hist_size + tid;
+
+    float elem = 0.f;
+    if (tid < block_hist_size)
+        elem = hist[0];
+
+    squares[tid] = elem * elem;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    float sum = reduce_smem(squares, nthreads);
+
+    float scale = 1.0f / (sqrt(sum) + 0.1f * block_hist_size);
+    elem = min(elem * scale, threshold);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    squares[tid] = elem * elem;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    sum = reduce_smem(squares, nthreads);
+    scale = 1.0f / (sqrt(sum) + 1e-3f);
+
+    if (tid < block_hist_size)
+        hist[0] = elem * scale;
+}
+
+//---------------------------------------------------------------------
+//  Linear SVM based classification
+//  48x96 window, 9 bins and default parameters
+//  180 threads, each thread corresponds to a bin in a row
+__kernel void classify_hists_180_kernel(
+    const int cdescr_width, const int cdescr_height, const int cblock_hist_size,
+    const int img_win_width, const int img_block_width,
+    const int win_block_stride_x, const int win_block_stride_y,
+    __global const float * block_hists, __global const float* coefs,
+    float free_coef, float threshold, __global uchar* labels)
+{
+    const int tid = get_local_id(0);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    __global const float* hist = block_hists + (gidY * win_block_stride_y *
+        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+
+    float product = 0.f;
+
+    for (int i = 0; i < cdescr_height; i++)
+    {
+        product += coefs[i * cdescr_width + tid] *
+            hist[i * img_block_width * cblock_hist_size + tid];
+    }
+
+    __local float products[180];
+
+    products[tid] = product;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 90) products[tid] = product = product + products[tid + 90];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 45) products[tid] = product = product + products[tid + 45];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    volatile __local float* smem = products;
+#ifdef CPU
+    if (tid < 13) smem[tid] = product = product + smem[tid + 32];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16) smem[tid] = product = product + smem[tid + 16];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<8) smem[tid] = product = product + smem[tid + 8];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<4) smem[tid] = product = product + smem[tid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<2) smem[tid] = product = product + smem[tid + 2];
+    barrier(CLK_LOCAL_MEM_FENCE);
+#else
+    if (tid < 13)
+    {
+        smem[tid] = product = product + smem[tid + 32];
+    }
+#if WAVE_SIZE < 32
+    barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+    if (tid < 16)
+    {
+        smem[tid] = product = product + smem[tid + 16];
+        smem[tid] = product = product + smem[tid + 8];
+        smem[tid] = product = product + smem[tid + 4];
+        smem[tid] = product = product + smem[tid + 2];
+    }
+#endif
+
+    if (tid == 0){
+        product = product + smem[tid + 1];
+        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
+    }
+}
+
+//---------------------------------------------------------------------
+//  Linear SVM based classification
+//  64x128 window, 9 bins and default parameters
+//  256 threads, 252 of them are used
+__kernel void classify_hists_252_kernel(
+    const int cdescr_width, const int cdescr_height, const int cblock_hist_size,
+    const int img_win_width, const int img_block_width,
+    const int win_block_stride_x, const int win_block_stride_y,
+    __global const float * block_hists, __global const float* coefs,
+    float free_coef, float threshold, __global uchar* labels)
+{
+    const int tid = get_local_id(0);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    __global const float* hist = block_hists + (gidY * win_block_stride_y *
+        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+
+    float product = 0.f;
+    if (tid < cdescr_width)
+    {
+        for (int i = 0; i < cdescr_height; i++)
+            product += coefs[i * cdescr_width + tid] *
+                hist[i * img_block_width * cblock_hist_size + tid];
+    }
+
+    __local float products[NTHREADS];
+
+    products[tid] = product;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 128) products[tid] = product = product + products[tid + 128];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 64) products[tid] = product = product + products[tid + 64];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    volatile __local float* smem = products;
+#ifdef CPU
+    if(tid<32) smem[tid] = product = product + smem[tid + 32];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<16) smem[tid] = product = product + smem[tid + 16];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<8) smem[tid] = product = product + smem[tid + 8];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<4) smem[tid] = product = product + smem[tid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<2) smem[tid] = product = product + smem[tid + 2];
+    barrier(CLK_LOCAL_MEM_FENCE);
+#else
+    if (tid < 32)
+    {
+        smem[tid] = product = product + smem[tid + 32];
+#if WAVE_SIZE < 32
+    } barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16) {
+#endif
+        smem[tid] = product = product + smem[tid + 16];
+        smem[tid] = product = product + smem[tid + 8];
+        smem[tid] = product = product + smem[tid + 4];
+        smem[tid] = product = product + smem[tid + 2];
+    }
+#endif
+    if (tid == 0){
+        product = product + smem[tid + 1];
+        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
+    }
+}
+
+//---------------------------------------------------------------------
+//  Linear SVM based classification
+//  256 threads
+__kernel void classify_hists_kernel(
+    const int cdescr_size, const int cdescr_width, const int cblock_hist_size,
+    const int img_win_width, const int img_block_width,
+    const int win_block_stride_x, const int win_block_stride_y,
+    __global const float * block_hists, __global const float* coefs,
+    float free_coef, float threshold, __global uchar* labels)
+{
+    const int tid = get_local_id(0);
+    const int gidX = get_group_id(0);
+    const int gidY = get_group_id(1);
+
+    __global const float* hist = block_hists + (gidY * win_block_stride_y *
+        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+
+    float product = 0.f;
+    for (int i = tid; i < cdescr_size; i += NTHREADS)
+    {
+        int offset_y = i / cdescr_width;
+        int offset_x = i - offset_y * cdescr_width;
+        product += coefs[i] *
+            hist[offset_y * img_block_width * cblock_hist_size + offset_x];
+    }
+
+    __local float products[NTHREADS];
+
+    products[tid] = product;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 128) products[tid] = product = product + products[tid + 128];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 64) products[tid] = product = product + products[tid + 64];
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    volatile __local float* smem = products;
+#ifdef CPU
+    if(tid<32) smem[tid] = product = product + smem[tid + 32];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<16) smem[tid] = product = product + smem[tid + 16];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<8) smem[tid] = product = product + smem[tid + 8];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<4) smem[tid] = product = product + smem[tid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<2) smem[tid] = product = product + smem[tid + 2];
+    barrier(CLK_LOCAL_MEM_FENCE);
+#else
+    if (tid < 32)
+    {
+        smem[tid] = product = product + smem[tid + 32];
+#if WAVE_SIZE < 32
+    } barrier(CLK_LOCAL_MEM_FENCE);
+    if (tid < 16) {
+#endif
+        smem[tid] = product = product + smem[tid + 16];
+        smem[tid] = product = product + smem[tid + 8];
+        smem[tid] = product = product + smem[tid + 4];
+        smem[tid] = product = product + smem[tid + 2];
+    }
+#endif
+    if (tid == 0){
+        smem[tid] = product = product + smem[tid + 1];
+        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
+    }
+}
+
+//----------------------------------------------------------------------------
+// Extract descriptors
+
+__kernel void extract_descrs_by_rows_kernel(
+    const int cblock_hist_size, const int descriptors_quadstep,
+    const int cdescr_size, const int cdescr_width, const int img_block_width,
+    const int win_block_stride_x, const int win_block_stride_y,
+    __global const float* block_hists, __global float* descriptors)
+{
+    int tid = get_local_id(0);
+    int gidX = get_group_id(0);
+    int gidY = get_group_id(1);
+
+    // Get left top corner of the window in src
+    __global const float* hist = block_hists + (gidY * win_block_stride_y *
+        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+
+    // Get left top corner of the window in dst
+    __global float* descriptor = descriptors +
+        (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
+
+    // Copy elements from src to dst
+    for (int i = tid; i < cdescr_size; i += NTHREADS)
+    {
+        int offset_y = i / cdescr_width;
+        int offset_x = i - offset_y * cdescr_width;
+        descriptor[i] = hist[offset_y * img_block_width * cblock_hist_size + offset_x];
+    }
+}
+
+__kernel void extract_descrs_by_cols_kernel(
+    const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size,
+    const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width,
+    const int win_block_stride_x, const int win_block_stride_y,
+    __global const float* block_hists, __global float* descriptors)
+{
+    int tid = get_local_id(0);
+    int gidX = get_group_id(0);
+    int gidY = get_group_id(1);
+
+    // Get left top corner of the window in src
+    __global const float* hist = block_hists +  (gidY * win_block_stride_y *
+        img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
+
+    // Get left top corner of the window in dst
+    __global float* descriptor = descriptors +
+        (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
+
+    // Copy elements from src to dst
+    for (int i = tid; i < cdescr_size; i += NTHREADS)
+    {
+        int block_idx = i / cblock_hist_size;
+        int idx_in_block = i - block_idx * cblock_hist_size;
+
+        int y = block_idx / cnblocks_win_x;
+        int x = block_idx - y * cnblocks_win_x;
+
+        descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] =
+            hist[(y * img_block_width  + x) * cblock_hist_size + idx_in_block];
+    }
+}
+
+//----------------------------------------------------------------------------
+// Gradients computation
+
+__kernel void compute_gradients_8UC4_kernel(
+    const int height, const int width,
+    const int img_step, const int grad_quadstep, const int qangle_step,
+    const __global uchar4 * img, __global float * grad, __global QANGLE_TYPE * qangle,
+    const float angle_scale, const char correct_gamma, const int cnbins)
+{
+    const int x = get_global_id(0);
+    const int tid = get_local_id(0);
+    const int gSizeX = get_local_size(0);
+    const int gidY = get_group_id(1);
+
+    __global const uchar4* row = img + gidY * img_step;
+
+    __local float sh_row[(NTHREADS + 2) * 3];
+
+    uchar4 val;
+    if (x < width)
+        val = row[x];
+    else
+        val = row[width - 2];
+
+    sh_row[tid + 1] = val.x;
+    sh_row[tid + 1 + (NTHREADS + 2)] = val.y;
+    sh_row[tid + 1 + 2 * (NTHREADS + 2)] = val.z;
+
+    if (tid == 0)
+    {
+        val = row[max(x - 1, 1)];
+        sh_row[0] = val.x;
+        sh_row[(NTHREADS + 2)] = val.y;
+        sh_row[2 * (NTHREADS + 2)] = val.z;
+    }
+
+    if (tid == gSizeX - 1)
+    {
+        val = row[min(x + 1, width - 2)];
+        sh_row[gSizeX + 1] = val.x;
+        sh_row[gSizeX + 1 + (NTHREADS + 2)] = val.y;
+        sh_row[gSizeX + 1 + 2 * (NTHREADS + 2)] = val.z;
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (x < width)
+    {
+        float4 a = (float4) (sh_row[tid], sh_row[tid + (NTHREADS + 2)],
+            sh_row[tid + 2 * (NTHREADS + 2)], 0);
+        float4 b = (float4) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)],
+            sh_row[tid + 2 + 2 * (NTHREADS + 2)], 0);
+
+        float4 dx;
+        if (correct_gamma == 1)
+            dx = sqrt(b) - sqrt(a);
+        else
+            dx = b - a;
+
+        float4 dy = (float4) 0.f;
+
+        if (gidY > 0 && gidY < height - 1)
+        {
+            a = convert_float4(img[(gidY - 1) * img_step + x].xyzw);
+            b = convert_float4(img[(gidY + 1) * img_step + x].xyzw);
+
+            if (correct_gamma == 1)
+                dy = sqrt(b) - sqrt(a);
+            else
+                dy = b - a;
+        }
+
+        float4 mag = hypot(dx, dy);
+        float best_dx = dx.x;
+        float best_dy = dy.x;
+
+        float mag0 = mag.x;
+        if (mag0 < mag.y)
+        {
+            best_dx = dx.y;
+            best_dy = dy.y;
+            mag0 = mag.y;
+        }
+
+        if (mag0 < mag.z)
+        {
+            best_dx = dx.z;
+            best_dy = dy.z;
+            mag0 = mag.z;
+        }
+
+        float ang = (atan2(best_dy, best_dx) + CV_PI_F) * angle_scale - 0.5f;
+        int hidx = (int)floor(ang);
+        ang -= hidx;
+        hidx = (hidx + cnbins) % cnbins;
+
+        qangle[(gidY * qangle_step + x) << 1] = hidx;
+        qangle[((gidY * qangle_step + x) << 1) + 1] = (hidx + 1) % cnbins;
+        grad[(gidY * grad_quadstep + x) << 1] = mag0 * (1.f - ang);
+        grad[((gidY * grad_quadstep + x) << 1) + 1] = mag0 * ang;
+    }
+}
+
+__kernel void compute_gradients_8UC1_kernel(
+    const int height, const int width,
+    const int img_step, const int grad_quadstep, const int qangle_step,
+    __global const uchar * img, __global float * grad, __global QANGLE_TYPE * qangle,
+    const float angle_scale, const char correct_gamma, const int cnbins)
+{
+    const int x = get_global_id(0);
+    const int tid = get_local_id(0);
+    const int gSizeX = get_local_size(0);
+    const int gidY = get_group_id(1);
+
+    __global const uchar* row = img + gidY * img_step;
+
+    __local float sh_row[NTHREADS + 2];
+
+    if (x < width)
+        sh_row[tid + 1] = row[x];
+    else
+        sh_row[tid + 1] = row[width - 2];
+
+    if (tid == 0)
+        sh_row[0] = row[max(x - 1, 1)];
+
+    if (tid == gSizeX - 1)
+        sh_row[gSizeX + 1] = row[min(x + 1, width - 2)];
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (x < width)
+    {
+        float dx;
+
+        if (correct_gamma == 1)
+            dx = sqrt(sh_row[tid + 2]) - sqrt(sh_row[tid]);
+        else
+            dx = sh_row[tid + 2] - sh_row[tid];
+
+        float dy = 0.f;
+        if (gidY > 0 && gidY < height - 1)
+        {
+            float a = (float) img[ (gidY + 1) * img_step + x ];
+            float b = (float) img[ (gidY - 1) * img_step + x ];
+            if (correct_gamma == 1)
+                dy = sqrt(a) - sqrt(b);
+            else
+                dy = a - b;
+        }
+        float mag = hypot(dx, dy);
+
+        float ang = (atan2(dy, dx) + CV_PI_F) * angle_scale - 0.5f;
+        int hidx = (int)floor(ang);
+        ang -= hidx;
+        hidx = (hidx + cnbins) % cnbins;
+
+        qangle[ (gidY * qangle_step + x) << 1 ]     = hidx;
+        qangle[ ((gidY * qangle_step + x) << 1) + 1 ] = (hidx + 1) % cnbins;
+        grad[ (gidY * grad_quadstep + x) << 1 ]       = mag * (1.f - ang);
+        grad[ ((gidY * grad_quadstep + x) << 1) + 1 ]   = mag * ang;
+    }
+}
--- a/Lib/opencv/sources/modules/objdetect/src/precomp.hpp
+++ b/Lib/opencv/sources/modules/objdetect/src/precomp.hpp
@@ -0,0 +1,53 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include "opencv2/objdetect.hpp"
+#include "opencv2/imgproc.hpp"
+
+#include "opencv2/core/utility.hpp"
+#include "opencv2/core/ocl.hpp"
+#include "opencv2/core/private.hpp"
+
+#endif
--- a/Lib/opencv/sources/modules/objdetect/src/qrcode.cpp
+++ b/Lib/opencv/sources/modules/objdetect/src/qrcode.cpp