// See legal notice in Copying.txt for more information

// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
// http://www.gnu.org/copyleft/gpl.html .

#include "PlaneOfBlocks.h"
#include "Padding.h"


PlaneOfBlocks::PlaneOfBlocks(int _nBlkX, int _nBlkY, int _nBlkSize, int _nPel, int _nLevel, int _nFlags, int _nOverlap, int _yRatioUV)
{

/* constant fields */

   nPel = _nPel;
   nLogPel = ilog2(nPel);

   nLogScale = _nLevel;
   nScale = iexp2(nLogScale);

   nBlkSize = _nBlkSize;
   nOverlap = _nOverlap;

   nBlkX = _nBlkX;
   nBlkY = _nBlkY;
   nBlkCount = nBlkX * nBlkY;

   nFlags = _nFlags;
   yRatioUV = _yRatioUV;

   smallestPlane = (bool)(nFlags & MOTION_SMALLEST_PLANE);
   mmx = (bool)(nFlags & MOTION_USE_MMX);
   isse = (bool)(nFlags & MOTION_USE_ISSE);
   chroma = (bool)(nFlags & MOTION_USE_CHROMA_MOTION);

   globalMVPredictor = zeroMV;

/* arrays memory allocation */

   vectors = new VECTOR[nBlkCount];
 
/* function's pointers initialization */

   if ( isse )
   {
      switch (nBlkSize)
      {
      case 16:
         SAD = Sad16x16_iSSE;
         VAR = Var16x16_iSSE;
         LUMA = Luma16x16_iSSE;
         BLITLUMA = Copy16_mmx;
		 if (yRatioUV==2) {
			 BLITCHROMA = Copy8_mmx;
			 SADCHROMA = Sad8x8_iSSE;
		 }
		 else { //yRatioUV==1
			 BLITCHROMA = Copy8x16_mmx;
			 SADCHROMA = Sad8x16_iSSE;
		 }
         break;
      case 4:
         SAD = Sad4x4_iSSE;
         VAR = Var4x4_iSSE;
         LUMA = Luma4x4_iSSE;
         BLITLUMA = Copy4_mmx;
		 if (yRatioUV==2) {
			 BLITCHROMA = Copy2_mmx;
			 SADCHROMA = Sad_C<2>;
		 }
		 else { //yRatioUV==1
			 BLITCHROMA = Copy_mmx<2, 4>;
			 SADCHROMA = Sad_C<2,4>;
		 }
         break;
      case 8:
      default:
         SAD = Sad8x8_iSSE;
         VAR = Var8x8_iSSE;
         LUMA = Luma8x8_iSSE;
         BLITLUMA = Copy8_mmx;
		 if (yRatioUV==2) {
	         BLITCHROMA = Copy4_mmx;
		     SADCHROMA = Sad4x4_iSSE;
		 }
		 else {//yRatioUV==1
	         BLITCHROMA = Copy4x8_mmx;
		     SADCHROMA = Sad4x8_iSSE;
		 }
      }
   }
   else
   {
      switch (nBlkSize)
      {
      case 16:
         SAD = Sad_C<16>;
         VAR = Var_C<16>;
         LUMA = Luma_C<16>;
         BLITLUMA = Copy_C<16>; // "mmx" version could be used, but it's more like a debugging version
 		 if (yRatioUV==2) {
			BLITCHROMA = Copy_C<8>; // idem
			 SADCHROMA = Sad_C<8>;
		 }
		 else {//yRatioUV==1
			BLITCHROMA = Copy_C<8,16>; // idem
			 SADCHROMA = Sad_C<8,16>;
		 }
         break;
      case 4:
         SAD = Sad_C<4>;
         VAR = Var_C<4>;
         LUMA = Luma_C<4>;
         BLITLUMA = Copy_C<4>; // "mmx" version could be used, but it's more like a debugging version
 		 if (yRatioUV==2) {
			 BLITCHROMA = Copy_C<2>; // idem
			 SADCHROMA = Sad_C<2>;
		 }
		 else {//yRatioUV==1
			BLITCHROMA = Copy_C<2,4>; // idem
			SADCHROMA = Sad_C<2,4>;
		 }
         break;
      case 8:
      default:
         SAD = Sad_C<8>;
         VAR = Var_C<8>;
         LUMA = Luma_C<8>;
         BLITLUMA = Copy_C<8>;
		 if (yRatioUV==2) {
			 BLITCHROMA = Copy_C<4>; // idem
			 SADCHROMA = Sad_C<4>;
		 }
		 else {//yRatioUV==1
			 BLITCHROMA = Copy_C<4,8>; // idem
			 SADCHROMA = Sad_C<4,8>;
		 }

      }
   }
   if ( !chroma )
      SADCHROMA = Sad_C<0>;

}

PlaneOfBlocks::~PlaneOfBlocks()
{
   delete[] vectors;
}

void PlaneOfBlocks::SearchMVs(MVFrame *_pSrcFrame, MVFrame *_pRefFrame,
                              SearchType st, int stp, int lambda, int lsad, int pnew, 
							  int plevel, int flags, int *out, VECTOR * globalMVec)
{
	globalMVPredictor = *globalMVec;
   bool calcSrcLuma = (bool)(flags & MOTION_CALC_SRC_LUMA);
   bool calcRefLuma = (bool)(flags & MOTION_CALC_REF_LUMA);
   bool calcSrcVar = (bool)(flags & MOTION_CALC_VAR);
   int nOutPitchY = nBlkX * (nBlkSize - nOverlap) + nOverlap;
   int nOutPitchUV = (nBlkX * (nBlkSize - nOverlap) + nOverlap) / 2;
//  char debugbuf[128];
//   wsprintf(debugbuf,"MVCOMP1: nOutPitchUV=%d, nOverlap=%d, nBlkX=%d, nBlkSize=%d",nOutPitchUV, nOverlap, nBlkX, nBlkSize);
//   OutputDebugString(debugbuf);

   // write the plane's header
   WriteHeaderToArray(out);

   int *pBlkData = out + 1;

   Uint8 *pMoCompDataY, *pMoCompDataU, *pMoCompDataV;
   if ( nLogScale == 0 )
   {
	   int sizeY = nOutPitchY * (nBlkY * (nBlkSize - nOverlap) + nOverlap);
      pMoCompDataY = reinterpret_cast<Uint8 *>(out + out[0] + 1);
      pMoCompDataU = pMoCompDataY + sizeY;
      pMoCompDataV = pMoCompDataU + sizeY/(2*yRatioUV);
      out += out[0];
	   int size = (sizeY + sizeY/(2*yRatioUV) + sizeY/(2*yRatioUV) +3)/4;
 //      out[0] = nBlkX * nBlkY * nBlkSize * nBlkSize * 3 / 8 + 1;
      out[0] = size + 1; // int4
   }

   nFlags |= flags;

   pSrcFrame = _pSrcFrame;
   pRefFrame = _pRefFrame;

   x[0] = pSrcFrame->GetPlane(YPLANE)->GetHPadding();
   x[1] = pSrcFrame->GetPlane(UPLANE)->GetHPadding();
   x[2] = pSrcFrame->GetPlane(VPLANE)->GetHPadding();
	y[0] = pSrcFrame->GetPlane(YPLANE)->GetVPadding();
   y[1] = pSrcFrame->GetPlane(UPLANE)->GetVPadding();
   y[2] = pSrcFrame->GetPlane(VPLANE)->GetVPadding();

	blkx = 0;
	blky = 0;

   nSrcPitch[0] = pSrcFrame->GetPlane(YPLANE)->GetPitch();
   nSrcPitch[1] = pSrcFrame->GetPlane(UPLANE)->GetPitch();
   nSrcPitch[2] = pSrcFrame->GetPlane(VPLANE)->GetPitch();
   nRefPitch[0] = pRefFrame->GetPlane(YPLANE)->GetPitch();
   nRefPitch[1] = pRefFrame->GetPlane(UPLANE)->GetPitch();
   nRefPitch[2] = pRefFrame->GetPlane(VPLANE)->GetPitch();

   searchType = st;
	nSearchParam = stp;
   lambda = lambda / (nPel * nPel);

	// Functions using float must not be used here
	for ( blkIdx = 0; blkIdx < nBlkCount; blkIdx++ )
	{
      PROFILE_START(MOTION_PROFILE_ME);

      pSrc[0] = pSrcFrame->GetPlane(YPLANE)->GetAbsolutePelPointer(x[0], y[0]);
      pSrc[1] = pSrcFrame->GetPlane(UPLANE)->GetAbsolutePelPointer(x[1], y[1]);
      pSrc[2] = pSrcFrame->GetPlane(VPLANE)->GetAbsolutePelPointer(x[2], y[2]);

      if ( blky == 0 ) {
		  nLambda = 0;
	  }
      else if (plevel==1)  {// scale lambda - Fizick
		  nLambda = lambda*nScale; 
	  }
      else if (plevel==2) {
		  nLambda = lambda*nScale*nScale; 
	  }
      else { // ==0
		  nLambda = lambda; 
	  }

	  penaltyNew = pnew; // penalty for new vector
	  LSAD = lsad;    // SAD limit for lambda using
	  // may be they must be scaled by nPel ?

      /* computes search boundaries */
//      nDxMax = nPel * (pSrcFrame->GetPlane(YPLANE)->GetExtendedWidth() - x[0] - nBlkSize);
      nDxMax = nPel * (pSrcFrame->GetPlane(YPLANE)->GetExtendedWidth() - x[0] - pSrcFrame->GetPlane(YPLANE)->GetHPadding());
//      nDyMax = nPel * (pSrcFrame->GetPlane(YPLANE)->GetExtendedHeight()  - y[0] - nBlkSize);
      nDyMax = nPel * (pSrcFrame->GetPlane(YPLANE)->GetExtendedHeight()  - y[0] - pSrcFrame->GetPlane(YPLANE)->GetVPadding() );
	   nDxMin = -nPel * x[0];
		nDyMin = -nPel * y[0];

      /* search the mv */
		predictor = ClipMV(vectors[blkIdx]);
 
      PseudoEPZSearch();
//		bestMV = zeroMV; // debug

      /* write the results */
      pBlkData[0] = bestMV.x;
      pBlkData[1] = bestMV.y;
      pBlkData[2] = bestMV.sad;
      PROFILE_STOP(MOTION_PROFILE_ME);

      PROFILE_START(MOTION_PROFILE_LUMA_VAR);
      if ( calcSrcVar )
         pBlkData[3] = VAR(pSrc[0], nSrcPitch[0], pBlkData + 4);
      else if ( calcSrcLuma )
      {
         pBlkData[3] = 0;
         pBlkData[4] = LUMA(pSrc[0], nSrcPitch[0]);
      }
      else 
         pBlkData[3] = pBlkData[4] = 0;

      if ( calcRefLuma )
         pBlkData[5] = LUMA(GetRefBlock(bestMV.x, bestMV.y), nRefPitch[0]);
      else
         pBlkData[5] = 0;
      pBlkData += 6;
      PROFILE_STOP(MOTION_PROFILE_LUMA_VAR);

      PROFILE_START(MOTION_PROFILE_COMPENSATION);
      if ( nLogScale == 0 )
      {
         if ( nFlags & MOTION_COMPENSATE_LUMA )
            BLITLUMA(pMoCompDataY, nOutPitchY, GetRefBlock(bestMV.x, bestMV.y), nRefPitch[0]);
         if ( nFlags & MOTION_COMPENSATE_CHROMA_U )
            BLITCHROMA(pMoCompDataU, nOutPitchUV, GetRefBlockU(bestMV.x, bestMV.y), nRefPitch[1]);
         if ( nFlags & MOTION_COMPENSATE_CHROMA_V )
            BLITCHROMA(pMoCompDataV, nOutPitchUV, GetRefBlockV(bestMV.x, bestMV.y), nRefPitch[2]);
      }
      PROFILE_STOP(MOTION_PROFILE_COMPENSATION);
  
        /* increment indexes & pointers */
		blkx++;
      x[0] += (nBlkSize - nOverlap);
      x[1] += ((nBlkSize - nOverlap) /2);
      x[2] += ((nBlkSize - nOverlap) /2);
      pMoCompDataY += (nBlkSize - nOverlap);
      pMoCompDataU += (nBlkSize - nOverlap) /2;
      pMoCompDataV += (nBlkSize - nOverlap) /2;
 		if ( blkx == nBlkX )
		{
			blkx = 0;
         x[0] = pSrcFrame->GetPlane(YPLANE)->GetHPadding();
         x[1] = pSrcFrame->GetPlane(UPLANE)->GetHPadding();
         x[2] = pSrcFrame->GetPlane(VPLANE)->GetHPadding();
			blky++;
			y[0] += (nBlkSize - nOverlap);
         y[1] += ((nBlkSize - nOverlap) /yRatioUV );
         y[2] += ((nBlkSize - nOverlap) /yRatioUV );
         pMoCompDataY += (nBlkSize - nOverlap) * nOutPitchY - (nBlkSize - nOverlap)*nBlkX;
         pMoCompDataU += ((nBlkSize - nOverlap) / yRatioUV) * nOutPitchUV - ((nBlkSize - nOverlap)/2) * nBlkX;
         pMoCompDataV += ((nBlkSize - nOverlap) / yRatioUV) * nOutPitchUV - ((nBlkSize - nOverlap)/2) * nBlkX;
		}
	}

	__asm { emms }
}

void PlaneOfBlocks::InterpolatePrediction(const PlaneOfBlocks &pob)
{
   int normFactor = 3 - nLogPel + pob.nLogPel;
	int mulFactor = (normFactor < 0) ? -normFactor : 0;
	normFactor = (normFactor < 0) ? 0 : normFactor;

   for ( int l = 0, index = 0; l < nBlkY; l++ )
	{
		for ( int k = 0; k < nBlkX; k++, index++ )
		{
			VECTOR v1, v2, v3, v4;
			int i = k; 
			int j = l;
			if ( i == 2 * pob.nBlkX ) i--;
			if ( j == 2 * pob.nBlkY ) j--;
			int offy = -1 + 2 * ( j % 2);
			int offx = -1 + 2 * ( i % 2);
	
			if (( i == 0 ) || (i == 2 * pob.nBlkX - 1))
			{
				if (( j == 0 ) || ( j == 2 * pob.nBlkY - 1))
				{
					v1 = v2 = v3 = v4 = pob.vectors[i / 2 + (j / 2) * pob.nBlkX];
				}
				else 
				{
					v1 = v2 = pob.vectors[i / 2 + (j / 2) * pob.nBlkX];
					v3 = v4 = pob.vectors[i / 2 + (j / 2 + offy) * pob.nBlkX];
				}
			}
			else if (( j == 0 ) || ( j >= 2 * pob.nBlkY - 1))
			{
				v1 = v2 = pob.vectors[i / 2 + (j / 2) * pob.nBlkX];
				v3 = v4 = pob.vectors[i / 2 + offx + (j / 2) * pob.nBlkX];
			}
			else
			{
				v1 = pob.vectors[i / 2 + (j / 2) * pob.nBlkX];
				v2 = pob.vectors[i / 2 + offx + (j / 2) * pob.nBlkX];
				v3 = pob.vectors[i / 2 + (j / 2 + offy) * pob.nBlkX];
				v4 = pob.vectors[i / 2 + offx + (j / 2 + offy) * pob.nBlkX];
			}

			vectors[index].x = 9 * v1.x + 3 * v2.x + 3 * v3.x + v4.x;
			vectors[index].y = 9 * v1.y + 3 * v2.y + 3 * v3.y + v4.y;
         vectors[index].sad = 9 * v1.sad + 3 * v2.sad + 3 * v3.sad + v4.sad + 8;

			vectors[index].x = (vectors[index].x >> normFactor) << mulFactor;
			vectors[index].y = (vectors[index].y >> normFactor) << mulFactor;
         vectors[index].sad = vectors[index].sad >> 4;
		}
	}
}

void PlaneOfBlocks::WriteHeaderToArray(int *array)
{
    array[0] = nBlkCount * 6 + 1;
}

int PlaneOfBlocks::WriteDefaultToArray(int *array)
{
   array[0] = nBlkCount * 6 + 1;
	memset(array + 1, 0, nBlkCount * sizeof(int) * 6);
   if ( nLogScale == 0 )
   {
      array += array[0];
	  int nn = (nBlkX * (nBlkSize - nOverlap) + nOverlap) * (nBlkY  * (nBlkSize - nOverlap) + nOverlap);
//	   int size = (nn + nn/4 + nn/4 +3)/4;
	   int size = (nn + nn/(2*yRatioUV) + nn/(2*yRatioUV) +3)/4;
      array[0] = size + 1;
      memset(array + 1, 0, size*sizeof(int));
   }
   return GetArraySize();
}

int PlaneOfBlocks::GetArraySize()
{
	int size = 0;
	size += 1;              // mb data size storage
	size += nBlkCount * 6;  // vectors, sad, luma src, luma ref, var
   
   if ( nLogScale == 0 )
   {
      size += 1;           // compensation size storage
	  int nn = (nBlkX * (nBlkSize - nOverlap) + nOverlap) * (nBlkY  * (nBlkSize - nOverlap) + nOverlap);
      size += (nn + nn/(2*yRatioUV) + nn/(2*yRatioUV) +3)/4; // luma comp, chroma u comp, chroma v comp
   }

   return size;
}

void PlaneOfBlocks::FetchPredictors()
{
	// Left predictor
	if ( blkx > 0 ) predictors[1] = ClipMV(vectors[blkIdx - 1]);
	else predictors[1] = zeroMV;

	// Up predictor
	if ( blky > 0 ) predictors[2] = ClipMV(vectors[blkIdx - nBlkX]);
	else predictors[2] = zeroMV;

	// Up-right predictor
	if (( blky > 0 ) && ( blkx < nBlkX - 1 ))
		predictors[3] = ClipMV(vectors[blkIdx - nBlkX + 1]);
	else predictors[3] = zeroMV;

	// Median predictor
	if ( blky > 0 ) // replaced 1 by 0 - Fizick
	{
		predictors[0].x = Median(predictors[1].x, predictors[2].x, predictors[3].x);
		predictors[0].y = Median(predictors[1].y, predictors[2].y, predictors[3].y);
      predictors[0].sad = Median(predictors[1].sad, predictors[2].sad, predictors[3].sad); 
	  // but it is not true median vector (x and y may be mixed) and not its sad ?!
	}
	else {
		predictors[0].x = (predictors[1].x + predictors[2].x + predictors[3].x); 
		predictors[0].y = (predictors[1].y + predictors[2].y + predictors[3].y);
      predictors[0].sad = (predictors[1].sad + predictors[2].sad + predictors[3].sad);
	}

    // if there are no other planes, predictor is the median
	if ( smallestPlane ) predictor = predictors[0];
/*   else 
   {
      if ( predictors[0].sad < predictor.sad )// disabled by Fizick (hierarhy only!)
      {
         predictors[4] = predictor;
         predictor = predictors[0];
         predictors[0] = predictors[4];
      }
   }
*/
	if ( predictor.sad > LSAD ) nLambda = 0; // generalized (was LSAD=400) by Fizick
}

void PlaneOfBlocks::PseudoEPZSearch()
{
	int sad;

	FetchPredictors();

	// We treat zero alone
	// Do we bias zero with not taking into account distorsion ?
	   bestMV = zeroMV;
		bestMV.sad = SAD(pSrc[0], GetRefBlock(0, 0), nSrcPitch[0], nRefPitch[0])
		  + SADCHROMA(pSrc[1], GetRefBlockU(0, 0), nSrcPitch[1], nRefPitch[1])
		  + SADCHROMA(pSrc[2], GetRefBlockV(0, 0), nSrcPitch[2], nRefPitch[2]);
		nMinCost = bestMV.sad; //+ (penaltyNew>>1); // add small penalty for coherence and again remove it for static stability - Fizick

	// Global MV predictor  - added by Fizick
	if ( (( globalMVPredictor.x != 0 ) || ( globalMVPredictor.y != 0 )) && 
		IsVectorOK(globalMVPredictor.x, globalMVPredictor.y ) )
	{
		sad = SAD(pSrc[0], GetRefBlock(globalMVPredictor.x, globalMVPredictor.y), nSrcPitch[0], nRefPitch[0])
         + SADCHROMA(pSrc[1], GetRefBlockU(globalMVPredictor.x, globalMVPredictor.y), nSrcPitch[1], nRefPitch[1])
         + SADCHROMA(pSrc[2], GetRefBlockV(globalMVPredictor.x, globalMVPredictor.y), nSrcPitch[2], nRefPitch[2]);
		int cost = sad;
		if ( cost  < nMinCost )
		{
			bestMV.x = globalMVPredictor.x;
			bestMV.y = globalMVPredictor.y;
			 bestMV.sad = sad;
			nMinCost = bestMV.sad;
		}
	}

	// Then, the predictor :
	if ( (( predictor.x != 0 ) || ( predictor.y != 0 )) &&
		 (( predictor.x != globalMVPredictor.x ) || ( predictor.y != globalMVPredictor.y )) &&
		IsVectorOK(predictor.x, predictor.y) )
	{
		sad = SAD(pSrc[0], GetRefBlock(predictor.x, predictor.y), nSrcPitch[0], nRefPitch[0])
         + SADCHROMA(pSrc[1], GetRefBlockU(predictor.x, predictor.y), nSrcPitch[1], nRefPitch[1])
         + SADCHROMA(pSrc[2], GetRefBlockV(predictor.x, predictor.y), nSrcPitch[2], nRefPitch[2]);
		int cost = sad;
		if ( cost  < nMinCost )
		{
			bestMV.x = predictor.x;
			bestMV.y = predictor.y;
         bestMV.sad = sad;
			nMinCost = cost;
		}
	}

   // then all the other predictors
	for ( int i = 0; i < 4; i++ ) 
	{
		CheckMV(predictors[i].x, predictors[i].y);
	}

	if (nMinCost > penaltyNew) // only do search if no good predictors - Fizick
	{

   // then, we refine, according to the search type
	if ( searchType & EXHAUSTIVE )
       ExhaustiveSearch(nSearchParam);

	if ( searchType & LOGARITHMIC )
		for ( int i = nSearchParam; i > 0; i /= 2 )
			DiamondSearch(i);

	if ( searchType & ONETIME )
		for ( int i = nSearchParam; i > 0; i /= 2 )
			OneTimeSearch(i);

	if ( searchType & NSTEP )
		NStepSearch(nSearchParam);
	
	if ( searchType & SQUARE )
		SquareSearch();
	}

   // we store the result
	vectors[blkIdx] = bestMV;
}

void PlaneOfBlocks::DiamondSearch(int length)
{
	// The meaning of the directions are the following :
	//		* 1 means right
	//		* 2 means left
	//		* 4 means down
	//		* 8 means up
	// So 1 + 4 means down right, and so on...

	int dx;
	int dy;

	// We begin by making no assumption on which direction to search.
	int direction = 15;

	int lastDirection;

	while ( direction > 0 )
	{
		dx = bestMV.x;
		dy = bestMV.y;
		lastDirection = direction;
		direction = 0;

		// First, we look the directions that were hinted by the previous step
		// of the algorithm. If we find one, we add it to the set of directions
		// we'll test next
		if ( lastDirection & 1 ) CheckMV2(dx + length, dy, &direction, 1);
		if ( lastDirection & 2 ) CheckMV2(dx - length, dy, &direction, 2);
		if ( lastDirection & 4 ) CheckMV2(dx, dy + length, &direction, 4);
		if ( lastDirection & 8 ) CheckMV2(dx, dy - length, &direction, 8);

		// If one of the directions improves the SAD, we make further tests 
		// on the diagonals
		if ( direction ) {
			lastDirection = direction;
			dx = bestMV.x;
			dy = bestMV.y;

			if ( lastDirection & 3 ) 
			{
				CheckMV2(dx, dy + length, &direction, 4);
				CheckMV2(dx, dy - length, &direction, 8);
			}
			else {
				CheckMV2(dx + length, dy, &direction, 1);
				CheckMV2(dx - length, dy, &direction, 2);
			}
		}

		// If not, we do not stop here. We infer from the last direction the
		// diagonals to be checked, because we might be lucky.
		else {
			switch ( lastDirection ) {
				case 1 : 
					CheckMV2(dx + length, dy + length, &direction, 1 + 4);
					CheckMV2(dx + length, dy - length, &direction, 1 + 8);
					break;
				case 2 : 
					CheckMV2(dx - length, dy + length, &direction, 2 + 4);
					CheckMV2(dx - length, dy - length, &direction, 2 + 8);
					break;
				case 4 : 
					CheckMV2(dx + length, dy + length, &direction, 1 + 4);
					CheckMV2(dx - length, dy + length, &direction, 2 + 4);
					break;
				case 8 : 
					CheckMV2(dx + length, dy - length, &direction, 1 + 8);
					CheckMV2(dx - length, dy - length, &direction, 2 + 8);
					break;
				case 1 + 4 :
					CheckMV2(dx + length, dy + length, &direction, 1 + 4);
					CheckMV2(dx - length, dy + length, &direction, 2 + 4);
					CheckMV2(dx + length, dy - length, &direction, 1 + 8);
					break;
				case 2 + 4 :
					CheckMV2(dx + length, dy + length, &direction, 1 + 4);
					CheckMV2(dx - length, dy + length, &direction, 2 + 4);
					CheckMV2(dx - length, dy - length, &direction, 2 + 8);
					break;
				case 1 + 8 :
					CheckMV2(dx + length, dy + length, &direction, 1 + 4);
					CheckMV2(dx - length, dy - length, &direction, 2 + 8);
					CheckMV2(dx + length, dy - length, &direction, 1 + 8);
					break;
				case 2 + 8 :
					CheckMV2(dx - length, dy - length, &direction, 2 + 8);
					CheckMV2(dx - length, dy + length, &direction, 2 + 4);
					CheckMV2(dx + length, dy - length, &direction, 1 + 8);
					break;
				default :
					// Even the default case may happen, in the first step of the 
					// algorithm for example.
					CheckMV2(dx + length, dy + length, &direction, 1 + 4);
					CheckMV2(dx - length, dy + length, &direction, 2 + 4);
					CheckMV2(dx + length, dy - length, &direction, 1 + 8);
					CheckMV2(dx - length, dy - length, &direction, 2 + 8);
					break;
			}
		}
	}
}
	
void PlaneOfBlocks::SquareSearch()
{
	ExhaustiveSearch(1);
}

void PlaneOfBlocks::ExhaustiveSearch(int s)
{
	int i, j;
	VECTOR mv = bestMV;

	for ( i = -s + 1; i < 0; i++ )
		for ( j = -s + 1; j < s; j++ )
			CheckMV(mv.x + i, mv.y + j);

	for ( i = 1; i < s; i++ )
		for ( j = -s + 1; j < s; j++ )
			CheckMV(mv.x + i, mv.y + j);

	for ( j = -s + 1; j < 0; j++ )
		CheckMV(mv.x, mv.y + j);

	for ( j = 1; j < s; j++ )
		CheckMV(mv.x, mv.y + j);

}

void PlaneOfBlocks::NStepSearch(int stp)
{
	int dx, dy;
	int length = stp;
	while ( length > 0 )
	{
		dx = bestMV.x;
		dy = bestMV.y;

		CheckMV(dx + length, dx + length);
		CheckMV(dx + length, dx);
		CheckMV(dx + length, dx - length);
		CheckMV(dx, dx - length);
		CheckMV(dx, dx + length);
		CheckMV(dx - length, dx + length);
		CheckMV(dx - length, dx);
		CheckMV(dx - length, dx - length);

		length--;
	}
}

void PlaneOfBlocks::OneTimeSearch(int length)
{
	int direction = 0;
	int dx = bestMV.x;
	int dy = bestMV.y;
	
	CheckMV2(dx - length, dy, &direction, 2);
	CheckMV2(dx + length, dy, &direction, 1);

	if ( direction == 1 )
	{
		while ( direction )
		{
			direction = 0;
			dx += length;
			CheckMV2(dx + length, dy, &direction, 1);
		}
	}
	else if ( direction == 2 )
	{
		while ( direction )
		{
			direction = 0;
			dx -= length;
			CheckMV2(dx - length, dy, &direction, 1);
		}
	}

	CheckMV2(dx, dy - length, &direction, 2);
	CheckMV2(dx, dy + length, &direction, 1);

	if ( direction == 1 )
	{
		while ( direction )
		{
			direction = 0;
			dy += length;
			CheckMV2(dx, dy + length, &direction, 1);
		}
	}
	else if ( direction == 2 )
	{
		while ( direction )
		{
			direction = 0;
			dy -= length;
			CheckMV2(dx, dy - length, &direction, 1);
		}
	}
}
//------------------------------------------------------
//  get i-th element of array s (by value) by partial sorting  
//int findi(int *s, int n, int i)
//{
//	int c,j,k;
//    for (k=0; k<=i; k++) 
//	{ 
//		for (j=k+1; j<=n; j++)
//		{
//		    if (s[k] < s[j]) 
//			{
//			  c=s[k];
//			  s[k]=s[j];
//			  s[j]=c;
//			}
//		}
//	}
//	return s[i]; 
//} 
//
// more fast algorithm to search k-th element (by value) of array 
// i use it to find median k=n/2
// borrowed from http://alglib.manual.ru/sorting/kthelem.php
int KthElement(int * marr, int n, int k, int * larr)
{
	// lar is temporal integer array size >=n
    int result;
    int i;
    int j;
    int l;
    int nm;
    int mm;
    int km;
    int med;
    int tmp;
    int tmp2;
//    int * larr;
//    int mergelen;

//    larr = (int *)calloc(n, sizeof(int)); // fixed memory leakage in v1.0.1
    k = k+1;
    while(n>5)
    {
        i = 1;
        do
        {
            larr[i-1] = marr[i-1];
            i = i+1;
        }
        while(i<=n);
        nm = n;
        while(nm>5)
        {
            mm = nm/5;
            i = 1;
            do
            {
                km = (i-1)*5;
                j = 1;
                do
                {
                    l = 1;
                    do
                    {
                        if( larr[km+l-1]>larr[km+l-1] )
                        {
                            tmp = larr[km+l-1];
                            larr[km+l-1] = larr[km+l];
                            larr[km+l] = tmp;
                        }
                        l = l+1;
                    }
                    while(l<=5-j);
                    j = j+1;
                }
                while(j<=5);
                larr[i-1] = larr[km+2];
                i = i+1;
            }
            while(i<=mm);
            km = mm*5;
            nm = nm-km;
            if( nm>0 )
            {
                mm = mm+1;
                if( nm>1 )
                {
                    j = 1;
                    do
                    {
                        l = 1;
                        do
                        {
                            if( larr[km+l-1]>larr[km+l] )
                            {
                                tmp = larr[km+l-1];
                                larr[km+l-1] = larr[km+l];
                                larr[km+l] = tmp;
                            }
                            l = l+1;
                        }
                        while(l<=nm-j);
                        j = j+1;
                    }
                    while(j<=nm);
                }
                larr[mm-1] = larr[km];
            }
            nm = mm;
        }
        if( nm!=1 )
        {
            j = 1;
            do
            {
                l = 1;
                do
                {
                    if( larr[l-1]>larr[l] )
                    {
                        tmp = larr[l-1];
                        larr[l-1] = larr[l];
                        larr[l] = tmp;
                    }
                    l = l+1;
                }
                while(l<=nm-j);
                j = j+1;
            }
            while(j<=nm);
            if( nm>=3 )
            {
                med = larr[1];
            }
            else
            {
                med = larr[0];
            }
        }
        else
        {
            med = larr[0];
        }
        i = 1;
        j = n;
        while(i!=j)
        {
            if( marr[i-1]>med )
            {
                while(marr[j-1]>med&&i!=j)
                {
                    j = j-1;
                }
                if( i!=j )
                {
                    tmp2 = marr[i-1];
                    marr[i-1] = marr[j-1];
                    marr[j-1] = tmp2;
                    i = i+1;
                }
            }
            else
            {
                i = i+1;
            }
        }
        if( k>=j )
        {
            i = j;
            do
            {
                marr[i-j] = marr[i-1];
                i = i+1;
            }
            while(i<=n);
            n = n-j+1;
            k = k-j+1;
        }
        else
        {
            n = j-1;
        }
    }
    if( n!=1 )
    {
        i = 1;
        do
        {
            j = 1;
            do
            {
                if( marr[j-1]>marr[j] )
                {
                    tmp2 = marr[j-1];
                    marr[j-1] = marr[j];
                    marr[j] = tmp2;
                }
                j = j+1;
            }
            while(j<=n-i);
            i = i+1;
        }
        while(i<=n);
        result = marr[k-1];
    }
    else
    {
        result = marr[0];
    }
    return result;
}
//----------------------------------------------------------------

void PlaneOfBlocks::EstimateGlobalMVDoubled(VECTOR *globalMVec)
{
	// estimate global motion from current plane vectors data for using on next plane - added by Fizick
	// on input globalMVec is prev estimation
	// on output globalMVec is doubled for next scale plane using

	// use very simple but robust method (median)
	// more advanced method (like MVDepan) can be implemented later 

	int step = nBlkCount < 100 ? 1 : 2; // skip half for large arrays
	int nBlkCount1 = nBlkCount/step;

	int *arrayx = (int *)malloc((nBlkCount1+1)* sizeof(int));
	int *arrayy = (int *)malloc((nBlkCount1+1)* sizeof(int));
	int *arraytemp = (int *)malloc((nBlkCount1+1)* sizeof(int)); // v1.0.1

	int j = 0;
	for ( int i=0; i < nBlkCount1; i++ )
	{
		arrayx[i] = vectors[j].x;
		arrayy[i] = vectors[j].y;
		j += step;
	}

	// find median element values
//   int medianx = findi(arrayx, nBlkCount1, nBlkCount1/2); // slow
//   int mediany = findi(arrayy, nBlkCount1, nBlkCount1/2);
   int medianx = KthElement(arrayx, nBlkCount1, nBlkCount1/2, arraytemp); // more fast
   int mediany = KthElement(arrayy, nBlkCount1, nBlkCount1/2, arraytemp);
	globalMVec->x = 2*medianx; // doubled for next scale level
	globalMVec->y = 2*mediany;

// char debugbuf[100];
// sprintf(debugbuf,"MVAnalyse: nx=%d ny=%d next global vx=%d vy=%d", nBlkX, nBlkY, globalMVec->x, globalMVec->y);
// OutputDebugString(debugbuf);

	free(arrayx);
	free(arrayy);
	free(arraytemp);

}
