// Pixels flow motion function
// Copyright(c)2005 A.G.Balakhnin aka Fizick

// See legal notice in Copying.txt for more information

// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; version 2 of the License.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
// http://www.gnu.org/copyleft/gpl.html .

#include "MVFlow.h"
#include "CopyCode.h"
#include "MaskFun.h"


MVFlow::MVFlow(PClip _child, PClip _mvec, int _time256, int _mode, int _nIdx,
                           int nSCD1, int nSCD2, bool _mmx, bool _isse, IScriptEnvironment* env) :
GenericVideoFilter(_child),
MVFilter(_mvec, "MVFlow", env),
mvClip(_mvec, nSCD1, nSCD2, env)
{
   time256 = _time256;
   mode = _mode;
   nIdx = _nIdx;
   mmx = _mmx;
   isse = _isse;

     mvCore->AddFrames(nIdx, MV_BUFFER_FRAMES, mvClip.GetLevelCount(), nWidth, nHeight,
                        nPel, nHPadding, nVPadding, YUVPLANES, isse, yRatioUV);

	 // may be padded for full frame cover
	 nBlkXP = (nBlkX*(nBlkSize - nOverlap) + nOverlap < nWidth) ? nBlkX+1 : nBlkX;
	 nBlkYP = (nBlkY*(nBlkSize - nOverlap) + nOverlap < nHeight) ? nBlkY+1 : nBlkY;
	 nWidthP = nBlkXP*(nBlkSize - nOverlap) + nOverlap;
	 nHeightP = nBlkYP*(nBlkSize - nOverlap) + nOverlap;
	 // for YV12
	 nWidthPUV = nWidthP/2;
	 nHeightPUV = nHeightP/yRatioUV;

	 nHeightUV = nHeight/yRatioUV;
	 nWidthUV = nWidth/2;

	 int nHPaddingUV = nHPadding/2;
	 int nVPaddingUV = nVPadding/yRatioUV;


	 VPitchY = (nWidthP + 15) & (~15);
	 VPitchUV = (nWidthPUV + 15) & (~15);
//  char debugbuf[128];
//   wsprintf(debugbuf,"MVFlow: nBlkX=%d, nOverlap=%d, nBlkXP=%d, nWidth=%d, nWidthP=%d, VPitchY=%d",nBlkX, nOverlap, nBlkXP, nWidth, nWidthP, VPitchY);
//   OutputDebugString(debugbuf);

 	 VXFullY = new BYTE [nHeightP*VPitchY];
	 VXFullUV = new BYTE [nHeightPUV*VPitchUV];

 	 VYFullY = new BYTE [nHeightP*VPitchY];
	 VYFullUV = new BYTE [nHeightPUV*VPitchUV];

  	 VXSmallY = new BYTE [nBlkXP*nBlkYP];
  	 VYSmallY = new BYTE [nBlkXP*nBlkYP];
	 VXSmallUV = new BYTE [nBlkXP*nBlkYP];
	 VYSmallUV = new BYTE [nBlkXP*nBlkYP];


	 int pel2WidthY = (nWidth + 2*nHPadding)*nPel; // and pitch
	 pel2HeightY = (nHeight + 2*nVPadding)*nPel;
	 int pel2WidthUV = (nWidthUV + 2*nHPaddingUV)*nPel;
	 pel2HeightUV = (nHeightUV + 2*nVPaddingUV)*nPel;
	pel2PitchY = (pel2WidthY + 15) & (~15);
   	pel2PitchUV = (pel2WidthUV + 15) & (~15);
    pel2OffsetY = pel2PitchY * nVPadding*nPel + nHPadding*nPel;
    pel2OffsetUV = pel2PitchUV * nVPaddingUV*nPel + nHPaddingUV*nPel;
	 if (nPel>1)
	 {
		 pel2PlaneY = new BYTE [pel2PitchY*pel2HeightY];
		 pel2PlaneU = new BYTE [pel2PitchUV*pel2HeightUV];
		 pel2PlaneV = new BYTE [pel2PitchUV*pel2HeightUV];
	 }

	 int CPUF_Resize = env->GetCPUFlags();
	 if (!isse) CPUF_Resize = (CPUF_Resize & !CPUF_INTEGER_SSE) & !CPUF_SSE2;

	 upsizer = new SimpleResize(nWidthP, nHeightP, nBlkXP, nBlkYP, CPUF_Resize);
	 upsizerUV = new SimpleResize(nWidthPUV, nHeightPUV, nBlkXP, nBlkYP, CPUF_Resize);

	 LUTV = new int[256];
	Create_LUTV(time256, LUTV);

   if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 )
   {
		RefPlanes =  new YUY2Planes(nWidth, nHeight);
		DstPlanes =  new YUY2Planes(nWidth, nHeight);
   }

}

MVFlow::~MVFlow()
{
   if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 )
   {
	delete RefPlanes;
	delete DstPlanes;
   }

	delete upsizer;
	delete upsizerUV;

	delete VXFullY;
	delete VXFullUV;
	delete VYFullY;
	delete VYFullUV;
	delete VXSmallY;
	delete VYSmallY;
	delete VXSmallUV;
	delete VYSmallUV;

	 if (nPel>1)
	 {
		 delete pel2PlaneY;
		 delete pel2PlaneU;
		 delete pel2PlaneV;
	 }

	 delete LUTV;
}

void MVFlow::Create_LUTV(int time256, int *LUTV)
{
	for (int v=0; v<256; v++)
		LUTV[v] = ((v-128)*time256)/256;
}



void MVFlow::Shift(BYTE * pdst, int dst_pitch, const BYTE *pref, int ref_pitch,  BYTE *VXFull, int VXPitch,  BYTE *VYFull, int VYPitch, int width, int height, int time256)
{
	// shift mode
	if (nPel==1)
	{
		for (int h=0; h<height; h++)
		{
			for (int w=0; w<width; w++)
			{
				int vx = -((VXFull[w]-128)*time256)/256;
				int vy = -((VYFull[w]-128)*time256)/256;
				int href = h + vy;
				int wref = w + vx;
				if (href>=0 && href<height && wref>=0 && wref<width)// bound check if not padded
					pdst[vy*dst_pitch + vx + w] = pref[w];
			}
			pref += ref_pitch;
			pdst += dst_pitch;
			VXFull += VXPitch;
			VYFull += VYPitch;
		}
	}
	else if (nPel==2)
	{
		for (int h=0; h<height; h++)
		{
			for (int w=0; w<width; w++)
			{
				// very simple half-pixel using,  must be by image interpolation really (later)
				int vx = -((VXFull[w]-128)*time256)/512;
				int vy = -((VYFull[w]-128)*time256)/512;
				int href = h + vy;
				int wref = w + vx;
				if (href>=0 && href<height && wref>=0 && wref<width)// bound check if not padded
					pdst[vy*dst_pitch + vx + w] = pref[w];
			}
			pref += ref_pitch;
			pdst += dst_pitch;
			VXFull += VXPitch;
			VYFull += VYPitch;
		}
	}
}

void MVFlow::Fetch(BYTE * pdst, int dst_pitch, const BYTE *pref, int ref_pitch,  BYTE *VXFull, int VXPitch,  BYTE *VYFull, int VYPitch, int width, int height, int time256)
{
	// fetch mode
	if (nPel==1)
	{
		for (int h=0; h<height; h++)
		{
			for (int w=0; w<width; w++)
			{

//				int vx = ((VXFull[w]-128)*time256)>>8; //fast but not quite correct
//				int vy = ((VYFull[w]-128)*time256)>>8; // shift of negative values result in not same as division

//				int vx = ((VXFull[w]-128)*time256)/256; //correct
//				int vy = ((VYFull[w]-128)*time256)/256;
/*
				int vx = VXFull[w]-128;
				if (vx < 0) //vx =+;
					vx = -((-vx*time256)>>8);
				else
					vx = (vx*time256)>>8;

				int vy = VYFull[w]-128;
				if (vy < 0)	//vy++;
					vy = -((-vy*time256)>>8);
				else
					vy = (vy*time256)>>8;
*/
				int vx = LUTV[VXFull[w]];
				int vy = LUTV[VYFull[w]];

				pdst[w] = pref[vy*ref_pitch + vx + w];
			}
			pref += ref_pitch;
			pdst += dst_pitch;
			VXFull += VXPitch;
			VYFull += VYPitch;
		}
	}
	else if (nPel==2)
	{
		for (int h=0; h<height; h++)
		{
			for (int w=0; w<width; w++)
			{
				// use interpolated image

//				int vx = ((VXFull[w]-128)*time256)>>8;
//				int vy = ((VYFull[w]-128)*time256)>>8;

//				int vx = ((VXFull[w]-128)*time256)/256; //correct
//				int vy = ((VYFull[w]-128)*time256)/256;

/*
				int vx = VXFull[w]-128;
				if (vx < 0) //	vx++;
					vx = -((-vx*time256)>>8);
				else
					vx = (vx*time256)>>8;

				int vy = VYFull[w]-128;
				if (vy < 0) //	vy++;
					vy = -((-vy*time256)>>8);
				else
					vy = (vy*time256)>>8;
*/
				int vx = LUTV[VXFull[w]];
				int vy = LUTV[VYFull[w]];

				pdst[w] = pref[vy*ref_pitch + vx + (w<<1)];
			}
			pref += (ref_pitch)<<1;
			pdst += dst_pitch;
			VXFull += VXPitch;
			VYFull += VYPitch;
		}
	}
}


//-------------------------------------------------------------------------
PVideoFrame __stdcall MVFlow::GetFrame(int n, IScriptEnvironment* env)
{
	PVideoFrame	src	= child->GetFrame(n, env);
   PVideoFrame dst, ref;
   BYTE *pDst[3];
	const BYTE *pRef[3];
    int nDstPitches[3], nRefPitches[3];
	int nref;
	unsigned char *pDstYUY2;
	int nDstPitchYUY2;

   int off = mvClip.GetDeltaFrame(); // integer offset of reference frame

   if ( mvClip.IsBackward() )
   {
	   nref = n + off;
   }
   else
   {
		nref = n - off;
   }
   mvClip.Update(n, env);// backward from next to current


   if ( mvClip.IsUsable())
   {
		ref = child->GetFrame(nref, env);//  ref for  compensation
		dst = env->NewVideoFrame(vi);
		MVFrames *pFrames = mvCore->GetFrames(nIdx);
        MVGroupOfFrames *pRefGOF = pFrames->GetFrame(nref); // ref


		if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 )
		{
			const unsigned char *pRefYUY2 = ref->GetReadPtr();
			int nRefPitchYUY2 = ref->GetPitch();
			pRef[0] = RefPlanes->GetPtr();
			pRef[1] = RefPlanes->GetPtrU();
			pRef[2] = RefPlanes->GetPtrV();
			nRefPitches[0]  = RefPlanes->GetPitch();
			nRefPitches[1]  = RefPlanes->GetPitchUV();
			nRefPitches[2]  = RefPlanes->GetPitchUV();
			YUY2ToPlanes(pRefYUY2, nRefPitchYUY2, nWidth, nHeight,
				pRef[0], nRefPitches[0], pRef[1], pRef[2], nRefPitches[1], isse);

			pDstYUY2 = dst->GetWritePtr();
			nDstPitchYUY2 = dst->GetPitch();
			pDst[0] = DstPlanes->GetPtr();
			pDst[1] = DstPlanes->GetPtrU();
			pDst[2] = DstPlanes->GetPtrV();
			nDstPitches[0]  = DstPlanes->GetPitch();
			nDstPitches[1]  = DstPlanes->GetPitchUV();
			nDstPitches[2]  = DstPlanes->GetPitchUV();
			YUY2ToPlanes(pDstYUY2, nDstPitchYUY2, nWidth, nHeight,
				pDst[0], nDstPitches[0], pDst[1], pDst[2], nDstPitches[1], isse);
		}
		else
		{

		 pDst[0] = YWPLAN(dst);
         pDst[1] = UWPLAN(dst);
         pDst[2] = VWPLAN(dst);
         nDstPitches[0] = YPITCH(dst);
         nDstPitches[1] = UPITCH(dst);
         nDstPitches[2] = VPITCH(dst);

         pRef[0] = YRPLAN(ref);
         pRef[1] = URPLAN(ref);
         pRef[2] = VRPLAN(ref);
         nRefPitches[0] = YPITCH(ref);
         nRefPitches[1] = UPITCH(ref);
         nRefPitches[2] = VPITCH(ref);

		}

         pRefGOF->SetPlane(pRef[0], nRefPitches[0], YPLANE);
         pRefGOF->SetPlane(pRef[1], nRefPitches[1], UPLANE);
         pRefGOF->SetPlane(pRef[2], nRefPitches[2], VPLANE);
         pRefGOF->Pad(YUVPLANES);
         pRefGOF->Refine(YUVPLANES);


         MVPlane *pPlanes[3];

         pPlanes[0] = pRefGOF->GetFrame(0)->GetPlane(YPLANE);
         pPlanes[1] = pRefGOF->GetFrame(0)->GetPlane(UPLANE);
         pPlanes[2] = pRefGOF->GetFrame(0)->GetPlane(VPLANE);

		 if (nPel>1 )
		 {
			 // merge refined planes to big single plane
			 Merge4PlanesToBig(pel2PlaneY, pel2PitchY, pPlanes[0]->GetAbsolutePointer(0,0),
				 pPlanes[0]->GetAbsolutePointer(1,0), pPlanes[0]->GetAbsolutePointer(0,1),
				 pPlanes[0]->GetAbsolutePointer(1,1), pPlanes[0]->GetExtendedWidth(),
				 pPlanes[0]->GetExtendedHeight(), pPlanes[0]->GetPitch(), isse);
			 Merge4PlanesToBig(pel2PlaneU, pel2PitchUV, pPlanes[1]->GetAbsolutePointer(0,0),
				 pPlanes[1]->GetAbsolutePointer(1,0), pPlanes[1]->GetAbsolutePointer(0,1),
				 pPlanes[1]->GetAbsolutePointer(1,1), pPlanes[1]->GetExtendedWidth(),
				 pPlanes[1]->GetExtendedHeight(), pPlanes[1]->GetPitch(), isse);
			 Merge4PlanesToBig(pel2PlaneV, pel2PitchUV, pPlanes[2]->GetAbsolutePointer(0,0),
				 pPlanes[2]->GetAbsolutePointer(1,0), pPlanes[2]->GetAbsolutePointer(0,1),
				 pPlanes[2]->GetAbsolutePointer(1,1), pPlanes[2]->GetExtendedWidth(),
				 pPlanes[2]->GetExtendedHeight(), pPlanes[2]->GetPitch(), isse);

		 }


//		 char debugbuf[100];
//		 for (int V=100; V<145; V++)
//		 {
//			 int vx1 = ((V-128)*time256)>>8;
//				int vx2 = ((V*time256+127)>>8) - (time256>>1);
//				int vx3 = ((V*time256)/256) - (time256/2);
//				int vx4 = ((V-128)*time256)/256;
//		 		int vx5 = V-128;
//				if (vx5 < 0) vx5++;
//				vx5 = (vx5*time256)>>8;
//
//			 sprintf(debugbuf,"MVFlow: V=%d %d %d %d %d %d", V,vx1,vx2,vx3,vx4,vx5);
//			 OutputDebugString(debugbuf);
//		 }
//
	  // make  vector vx and vy small masks
	 // 1. ATTENTION: vectors are assumed SHORT (|vx|, |vy| < 127) !
	 // 2. they will be zeroed if not
	// 3. added 128 to all values
	MakeVectorSmallMasks(mvClip, nBlkX, nBlkY, VXSmallY, nBlkXP, VYSmallY, nBlkXP);
	if (nBlkXP > nBlkX) // fill right
	{
		for (int j=0; j<nBlkY; j++)
		{
			VXSmallY[j*nBlkXP + nBlkX] = min(VXSmallY[j*nBlkXP + nBlkX-1],128);
			VYSmallY[j*nBlkXP + nBlkX] = VYSmallY[j*nBlkXP + nBlkX-1];
		}
	}
	if (nBlkYP > nBlkY) // fill bottom
	{
		for (int i=0; i<nBlkXP; i++)
		{
			VXSmallY[nBlkXP*nBlkY +i] = VXSmallY[nBlkXP*(nBlkY-1) +i];
			VYSmallY[nBlkXP*nBlkY +i] = min(VYSmallY[nBlkXP*(nBlkY-1) +i],128);
		}
	}

	VectorSmallMaskYToHalfUV(VXSmallY, nBlkXP, nBlkYP, VXSmallUV, 2);
	VectorSmallMaskYToHalfUV(VYSmallY, nBlkXP, nBlkYP, VYSmallUV, yRatioUV);

	  // upsize (bilinear interpolate) vector masks to fullframe size

	  int dummyplane = PLANAR_Y; // use luma plane resizer code for all planes if we resize from luma small mask
	  upsizer->SimpleResizeDo(VXFullY, nWidthP, nHeightP, VPitchY, VXSmallY, nBlkXP, nBlkXP, dummyplane);
	  upsizer->SimpleResizeDo(VYFullY, nWidthP, nHeightP, VPitchY, VYSmallY, nBlkXP, nBlkXP, dummyplane);
	  upsizerUV->SimpleResizeDo(VXFullUV, nWidthPUV, nHeightPUV, VPitchUV, VXSmallUV, nBlkXP, nBlkXP, dummyplane);
	  upsizerUV->SimpleResizeDo(VYFullUV, nWidthPUV, nHeightPUV, VPitchUV, VYSmallUV, nBlkXP, nBlkXP, dummyplane);


	  if (mode==0) // fetch mode
		{
		  if (nPel==2)
		  {
			Fetch(pDst[0], nDstPitches[0], pel2PlaneY + pel2OffsetY, pel2PitchY, VXFullY, VPitchY, VYFullY, VPitchY, nWidth, nHeight, time256); //padded
			Fetch(pDst[1], nDstPitches[1], pel2PlaneU + pel2OffsetUV, pel2PitchUV, VXFullUV, VPitchUV, VYFullUV, VPitchUV, nWidthUV, nHeightUV, time256);
			Fetch(pDst[2], nDstPitches[2], pel2PlaneV + pel2OffsetUV, pel2PitchUV, VXFullUV, VPitchUV, VYFullUV, VPitchUV, nWidthUV, nHeightUV, time256);
		  }
		  else //(nPel==1)
		  {
			Fetch(pDst[0], nDstPitches[0], pPlanes[0]->GetPointer(0,0), pPlanes[0]->GetPitch(), VXFullY, VPitchY, VYFullY, VPitchY, nWidth, nHeight, time256); //padded
			Fetch(pDst[1], nDstPitches[1], pPlanes[1]->GetPointer(0,0), pPlanes[1]->GetPitch(), VXFullUV, VPitchUV, VYFullUV, VPitchUV, nWidthUV, nHeightUV, time256);
			Fetch(pDst[2], nDstPitches[2], pPlanes[2]->GetPointer(0,0), pPlanes[2]->GetPitch(), VXFullUV, VPitchUV, VYFullUV, VPitchUV, nWidthUV, nHeightUV, time256);
		  }
		}
		else if (mode==1) // shift mode
		{
			MemZoneSet(pDst[0], 0, nWidth, nHeight, 0, 0, nDstPitches[0]);
			MemZoneSet(pDst[1], 0, nWidthUV, nHeightUV, 0, 0, nDstPitches[1]);
			MemZoneSet(pDst[2], 0, nWidthUV, nHeightUV, 0, 0, nDstPitches[2]);
			Shift(pDst[0], nDstPitches[0], pPlanes[0]->GetPointer(0,0), pPlanes[0]->GetPitch(), VXFullY, VPitchY, VYFullY, VPitchY, nWidth, nHeight, time256);
			Shift(pDst[1], nDstPitches[1], pPlanes[1]->GetPointer(0,0), pPlanes[1]->GetPitch(), VXFullUV, VPitchUV, VYFullUV, VPitchUV, nWidthUV, nHeightUV, time256);
			Shift(pDst[2], nDstPitches[2], pPlanes[2]->GetPointer(0,0), pPlanes[2]->GetPitch(), VXFullUV, VPitchUV, VYFullUV, VPitchUV, nWidthUV, nHeightUV, time256);
		}

		if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 )
		{
			YUY2FromPlanes(pDstYUY2, nDstPitchYUY2, nWidth, nHeight,
								  pDst[0], nDstPitches[0], pDst[1], pDst[2], nDstPitches[1], isse);
		}
		return dst;
   }
   else
   {
	   return src;
   }

}
