// Pixels flow motion blur function
// Copyright(c)2005 A.G.Balakhnin aka Fizick

// See legal notice in Copying.txt for more information

// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation;  version 2 of the License.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
// http://www.gnu.org/copyleft/gpl.html .

#include "MVFlowBlur.h"
#include "CopyCode.h"
#include "MaskFun.h"


MVFlowBlur::MVFlowBlur(PClip _child, PClip _mvbw, PClip _mvfw,  int _blur256, int _prec, int _nIdx,
                           int nSCD1, int nSCD2, bool _mmx, bool _isse, IScriptEnvironment* env) :
GenericVideoFilter(_child),
MVFilter(_mvbw, "MVFlowBlur", env),
mvClipB(_mvbw, nSCD1, nSCD2, env),
mvClipF(_mvfw, nSCD1, nSCD2, env)
{
   blur256 = _blur256;
   prec = _prec;
   nIdx = _nIdx;
   mmx = _mmx;
   isse = _isse;

     mvCore->AddFrames(nIdx, MV_BUFFER_FRAMES, mvClipB.GetLevelCount(), nWidth, nHeight,
                        nPel, nBlkSize, nBlkSize, YUVPLANES, isse, yRatioUV);
     mvCore->AddFrames(nIdx, MV_BUFFER_FRAMES, mvClipF.GetLevelCount(), nWidth, nHeight,
                        nPel, nBlkSize, nBlkSize, YUVPLANES, isse, yRatioUV);

	 nHeightUV = nHeight/yRatioUV; 
	 nWidthUV = nWidth/2;// for YV12
	 int nHPaddingUV = nHPadding/2;
	 int nVPaddingUV = nHPadding/yRatioUV;

		VPitchY = nWidth;
		VPitchUV= nWidthUV;

 	 VXFullYB = new BYTE [nHeight*VPitchY]; 
	 VXFullUVB = new BYTE [nHeightUV*VPitchUV];
 	 VYFullYB = new BYTE [nHeight*VPitchY];
	 VYFullUVB = new BYTE [nHeightUV*VPitchUV];

	 VXFullYF = new BYTE [nHeight*VPitchY]; 
	 VXFullUVF = new BYTE [nHeightUV*VPitchUV];
 	 VYFullYF = new BYTE [nHeight*VPitchY];
	 VYFullUVF = new BYTE [nHeightUV*VPitchUV];

  	 VXSmallYB = new BYTE [nBlkX*nBlkY];
  	 VYSmallYB = new BYTE [nBlkX*nBlkY];
	 VXSmallUVB = new BYTE [nBlkX*nBlkY]; 
	 VYSmallUVB = new BYTE [nBlkX*nBlkY]; 

  	 VXSmallYF = new BYTE [nBlkX*nBlkY];
  	 VYSmallYF = new BYTE [nBlkX*nBlkY];
	 VXSmallUVF = new BYTE [nBlkX*nBlkY]; 
	 VYSmallUVF = new BYTE [nBlkX*nBlkY]; 

	 MaskSmallB = new BYTE [nBlkX*nBlkY]; 
	 MaskFullYB = new BYTE [nHeight*VPitchY]; 
	 MaskFullUVB = new BYTE [nHeightUV*VPitchUV]; 

	 MaskSmallF = new BYTE [nBlkX*nBlkY]; 
	 MaskFullYF = new BYTE [nHeight*VPitchY]; 
	 MaskFullUVF = new BYTE [nHeightUV*VPitchUV]; 


	 int pel2WidthY = (nWidth + 2*nHPadding)*nPel; 
	 pel2HeightY = (nHeight + 2*nVPadding)*nPel;
	 int pel2WidthUV = (nWidthUV + 2*nHPaddingUV)*nPel;
	 pel2HeightUV = (nHeightUV + 2*nVPaddingUV)*nPel;
	pel2PitchY = (pel2WidthY + 15) & (~15);
   	pel2PitchUV = (pel2WidthUV + 15) & (~15);
    pel2OffsetY = pel2PitchY * nVPadding*nPel + nHPadding*nPel;
    pel2OffsetUV = pel2PitchUV * nVPaddingUV*nPel + nHPaddingUV*nPel;
	 if (nPel>1)
	 {
		 pel2PlaneYB = new BYTE [pel2PitchY*pel2HeightY];
		 pel2PlaneUB = new BYTE [pel2PitchUV*pel2HeightUV];
		 pel2PlaneVB = new BYTE [pel2PitchUV*pel2HeightUV];
	 }

	 int CPUF_Resize = env->GetCPUFlags();
	 if (!isse) CPUF_Resize = (CPUF_Resize & !CPUF_INTEGER_SSE) & !CPUF_SSE2;

	 upsizer = new SimpleResize(nWidth, nHeight, nBlkX, nBlkY, CPUF_Resize); 
	 upsizerUV = new SimpleResize(nWidthUV, nHeightUV, nBlkX, nBlkY, CPUF_Resize); 

	 if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 )
   {
		RefPlanes =  new YUY2Planes(nWidth, nHeight);
		DstPlanes =  new YUY2Planes(nWidth, nHeight);
   }

}

MVFlowBlur::~MVFlowBlur()
{
   if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 )
   {
	delete RefPlanes;
	delete DstPlanes;
   }

	delete upsizer;
	delete upsizerUV;

	delete VXFullYB;
	delete VXFullUVB;
	delete VYFullYB;
	delete VYFullUVB;
	delete VXSmallYB;
	delete VYSmallYB;
	delete VXSmallUVB;
	delete VYSmallUVB;
	delete VXFullYF;
	delete VXFullUVF;
	delete VYFullYF;
	delete VYFullUVF;
	delete VXSmallYF;
	delete VYSmallYF;
	delete VXSmallUVF;
	delete VYSmallUVF;

	delete MaskSmallB;
	delete MaskFullYB;
	delete MaskFullUVB;
	delete MaskSmallF;
	delete MaskFullYF;
	delete MaskFullUVF;
	 if (nPel>1)
	 {
		 delete pel2PlaneYB; 
		 delete pel2PlaneUB; 
		 delete pel2PlaneVB; 
	 }

}


void MVFlowBlur::FlowBlur(BYTE * pdst, int dst_pitch, const BYTE *pref, int ref_pitch,  
			   BYTE *VXFullB, BYTE *VXFullF, BYTE *VYFullB, BYTE *VYFullF, 
			   int VPitch, int width, int height, int blurmax256, int prec)
{
	// very slow, but precise motion blur
	if (nPel==1)
	{
		for (int h=0; h<height; h++)
		{
			for (int w=0; w<width; w++)
			{
				int bluredsum = pref[w];
				int vxF0 = ((VXFullF[w]-128)*blur256);
				int vyF0 = ((VYFullF[w]-128)*blur256);
				int mF = (max(abs(vxF0), abs(vyF0))/prec)>>8;
				if (mF>0)
				{
					vxF0 /= mF;
					vyF0 /= mF;
					int vxF = vxF0;
					int vyF = vyF0;
					for (int i=0; i<mF; i++)
					{
						int dstF = pref[(vyF>>8)*ref_pitch + (vxF>>8) + w]; 
						bluredsum += dstF;
						vxF += vxF0;
						vyF += vyF0;
					}
				}
				int vxB0 = ((VXFullB[w]-128)*blur256);
				int vyB0 = ((VYFullB[w]-128)*blur256);
				int mB = (max(abs(vxB0), abs(vyB0))/prec)>>8;
				if (mB>0)
				{
					vxB0 /= mB;
					vyB0 /= mB;
					int vxB = vxB0;
					int vyB = vyB0;
					for (int i=0; i<mB; i++)
					{
						int dstB = pref[(vyB>>8)*ref_pitch + (vxB>>8) + w]; 
						bluredsum += dstB;
						vxB += vxB0;
						vyB += vyB0;
					}
				}
				pdst[w] = bluredsum/(mF+mB+1);
			}
			pdst += dst_pitch;
			pref += ref_pitch;
			VXFullB += VPitch;
			VYFullB += VPitch;
			VXFullF += VPitch;
			VYFullF += VPitch;
		}
	}
	else if (nPel==2)
	{
		for (int h=0; h<height; h++)
		{
			for (int w=0; w<width; w++)
			{
				int bluredsum = pref[w<<1];
				int vxF0 = ((VXFullF[w]-128)*blur256);
				int vyF0 = ((VYFullF[w]-128)*blur256);
				int mF = (max(abs(vxF0), abs(vyF0))/prec)>>8;
				if (mF>0)
				{
					vxF0 /= mF;
					vyF0 /= mF;
					int vxF = vxF0;
					int vyF = vyF0;
					for (int i=0; i<mF; i++)
					{
						int dstF = pref[(vyF>>8)*ref_pitch + (vxF>>8) + (w<<1)]; 
						bluredsum += dstF;
						vxF += vxF0;
						vyF += vyF0;
					}
				}
				int vxB0 = ((VXFullB[w]-128)*blur256);
				int vyB0 = ((VYFullB[w]-128)*blur256);
				int mB = (max(abs(vxB0), abs(vyB0))/prec)>>8;
				if (mB>0)
				{
					vxB0 /= mB;
					vyB0 /= mB;
					int vxB = vxB0;
					int vyB = vyB0;
					for (int i=0; i<mB; i++)
					{
						int dstB = pref[(vyB>>8)*ref_pitch + (vxB>>8) + (w<<1)]; 
						bluredsum += dstB;
						vxB += vxB0;
						vyB += vyB0;
					}
				}
				pdst[w] = bluredsum/(mF+mB+1);
			}
			pdst += dst_pitch;
			pref += (ref_pitch<<1);
			VXFullB += VPitch;
			VYFullB += VPitch;
			VXFullF += VPitch;
			VYFullF += VPitch;
		}
	}
}
//-------------------------------------------------------------------------
PVideoFrame __stdcall MVFlowBlur::GetFrame(int n, IScriptEnvironment* env)
{
	PVideoFrame	src	= child->GetFrame(n, env);
   PVideoFrame dst;
   BYTE *pDst[3];
	const BYTE *pRef[3];
    int nDstPitches[3], nRefPitches[3];
	unsigned char *pDstYUY2;
	int nDstPitchYUY2;

   int off = mvClipB.GetDeltaFrame(); // integer offset of reference frame

   mvClipB.Update(n-off, env);// backward from current to prev
   mvClipF.Update(n+off, env);// forward from current to next


   if ( mvClipB.IsUsable()  && mvClipF.IsUsable() )
   {
		PVideoFrame ref = child->GetFrame(n, env);//  ref for  compensation
		dst = env->NewVideoFrame(vi);

		MVFrames *pFrames = mvCore->GetFrames(nIdx);
         MVGroupOfFrames *pRefGOFB = pFrames->GetFrame(n); // backward ref

		if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 )
		{
			const unsigned char *pRefYUY2 = ref->GetReadPtr();
			int nRefPitchYUY2 = ref->GetPitch();
			pRef[0] = RefPlanes->GetPtr();
			pRef[1] = RefPlanes->GetPtrU();
			pRef[2] = RefPlanes->GetPtrV();
			nRefPitches[0]  = RefPlanes->GetPitch();
			nRefPitches[1]  = RefPlanes->GetPitchUV();
			nRefPitches[2]  = RefPlanes->GetPitchUV();
			YUY2ToPlanes(pRefYUY2, nRefPitchYUY2, nWidth, nHeight, 
				pRef[0], nRefPitches[0], pRef[1], pRef[2], nRefPitches[1], isse);

			pDstYUY2 = dst->GetWritePtr();
			nDstPitchYUY2 = dst->GetPitch();
			pDst[0] = DstPlanes->GetPtr();
			pDst[1] = DstPlanes->GetPtrU();
			pDst[2] = DstPlanes->GetPtrV();
			nDstPitches[0]  = DstPlanes->GetPitch();
			nDstPitches[1]  = DstPlanes->GetPitchUV();
			nDstPitches[2]  = DstPlanes->GetPitchUV();
			YUY2ToPlanes(pDstYUY2, nDstPitchYUY2, nWidth, nHeight, 
				pDst[0], nDstPitches[0], pDst[1], pDst[2], nDstPitches[1], isse);
		}
		else
		{
         pDst[0] = YWPLAN(dst);
         pDst[1] = UWPLAN(dst);
         pDst[2] = VWPLAN(dst);
         nDstPitches[0] = YPITCH(dst);
         nDstPitches[1] = UPITCH(dst);
         nDstPitches[2] = VPITCH(dst);

         pRef[0] = YRPLAN(ref);
         pRef[1] = URPLAN(ref);
         pRef[2] = VRPLAN(ref);
         nRefPitches[0] = YPITCH(ref);
         nRefPitches[1] = UPITCH(ref);
         nRefPitches[2] = VPITCH(ref);

		}

         pRefGOFB->SetPlane(pRef[0], nRefPitches[0], YPLANE);
         pRefGOFB->SetPlane(pRef[1], nRefPitches[1], UPLANE);
         pRefGOFB->SetPlane(pRef[2], nRefPitches[2], VPLANE);
         pRefGOFB->Pad(YUVPLANES);
         pRefGOFB->Refine(YUVPLANES);

         MVPlane *pPlanesB[3];

         pPlanesB[0] = pRefGOFB->GetFrame(0)->GetPlane(YPLANE);
         pPlanesB[1] = pRefGOFB->GetFrame(0)->GetPlane(UPLANE);
         pPlanesB[2] = pRefGOFB->GetFrame(0)->GetPlane(VPLANE);


		 if (nPel==2)
		 {
			 // merge refined planes to big single plane
			 Merge4PlanesToBig(pel2PlaneYB, pel2PitchY, pPlanesB[0]->GetAbsolutePointer(0,0), 
				 pPlanesB[0]->GetAbsolutePointer(1,0), pPlanesB[0]->GetAbsolutePointer(0,1),
				 pPlanesB[0]->GetAbsolutePointer(1,1), pPlanesB[0]->GetExtendedWidth(),
				 pPlanesB[0]->GetExtendedHeight(), pPlanesB[0]->GetPitch(), isse);
			 Merge4PlanesToBig(pel2PlaneUB, pel2PitchUV, pPlanesB[1]->GetAbsolutePointer(0,0), 
				 pPlanesB[1]->GetAbsolutePointer(1,0), pPlanesB[1]->GetAbsolutePointer(0,1),
				 pPlanesB[1]->GetAbsolutePointer(1,1), pPlanesB[1]->GetExtendedWidth(),
				 pPlanesB[1]->GetExtendedHeight(), pPlanesB[1]->GetPitch(), isse);
			 Merge4PlanesToBig(pel2PlaneVB, pel2PitchUV, pPlanesB[2]->GetAbsolutePointer(0,0), 
				 pPlanesB[2]->GetAbsolutePointer(1,0), pPlanesB[2]->GetAbsolutePointer(0,1),
				 pPlanesB[2]->GetAbsolutePointer(1,1), pPlanesB[2]->GetExtendedWidth(),
				 pPlanesB[2]->GetExtendedHeight(), pPlanesB[2]->GetPitch(), isse);

		 }


	  // make  vector vx and vy small masks
	 // 1. ATTENTION: vectors are assumed SHORT (|vx|, |vy| < 127) !
	 // 2. they will be zeroed if not
	// 3. added 128 to all values
	MakeVectorSmallMasks(mvClipB, nBlkX, nBlkY, VXSmallYB, nBlkX, VYSmallYB, nBlkX);
	VectorSmallMaskYToHalfUV(VXSmallYB, nBlkX, nBlkY, VXSmallUVB, 2);
	VectorSmallMaskYToHalfUV(VYSmallYB, nBlkX, nBlkY, VYSmallUVB, yRatioUV);

	MakeVectorSmallMasks(mvClipF, nBlkX, nBlkY, VXSmallYF, nBlkX, VYSmallYF, nBlkX);
	VectorSmallMaskYToHalfUV(VXSmallYF, nBlkX, nBlkY, VXSmallUVF, 2);
	VectorSmallMaskYToHalfUV(VYSmallYF, nBlkX, nBlkY, VYSmallUVF, yRatioUV);

	  // analyse vectors field to detect occlusion

	  // upsize (bilinear interpolate) vector masks to fullframe size


	  int dummyplane = PLANAR_Y; // use luma plane resizer code for all planes if we resize from luma small mask
	  upsizer->SimpleResizeDo(VXFullYB, nWidth, nHeight, VPitchY, VXSmallYB, nBlkX, nBlkX, dummyplane);
	  upsizer->SimpleResizeDo(VYFullYB, nWidth, nHeight, VPitchY, VYSmallYB, nBlkX, nBlkX, dummyplane);
	  upsizerUV->SimpleResizeDo(VXFullUVB, nWidthUV, nHeightUV, VPitchUV, VXSmallUVB, nBlkX, nBlkX, dummyplane);
	  upsizerUV->SimpleResizeDo(VYFullUVB, nWidthUV, nHeightUV, VPitchUV, VYSmallUVB, nBlkX, nBlkX, dummyplane);

	  upsizer->SimpleResizeDo(VXFullYF, nWidth, nHeight, VPitchY, VXSmallYF, nBlkX, nBlkX, dummyplane);
	  upsizer->SimpleResizeDo(VYFullYF, nWidth, nHeight, VPitchY, VYSmallYF, nBlkX, nBlkX, dummyplane);
	  upsizerUV->SimpleResizeDo(VXFullUVF, nWidthUV, nHeightUV, VPitchUV, VXSmallUVF, nBlkX, nBlkX, dummyplane);
	  upsizerUV->SimpleResizeDo(VYFullUVF, nWidthUV, nHeightUV, VPitchUV, VYSmallUVF, nBlkX, nBlkX, dummyplane);


		  if (nPel==2)
		  {
			FlowBlur(pDst[0], nDstPitches[0], pel2PlaneYB + pel2OffsetY, pel2PitchY, 
				VXFullYB, VXFullYF, VYFullYB, VYFullYF, VPitchY, 
				nWidth, nHeight, blur256, prec);
			FlowBlur(pDst[1], nDstPitches[1], pel2PlaneUB + pel2OffsetUV, pel2PitchUV,
				VXFullUVB, VXFullUVF, VYFullUVB, VYFullUVF, VPitchUV, 
				nWidthUV, nHeightUV, blur256, prec);
			FlowBlur(pDst[2], nDstPitches[2], pel2PlaneVB + pel2OffsetUV, pel2PitchUV, 
				VXFullUVB, VXFullUVF, VYFullUVB, VYFullUVF, VPitchUV, 
				nWidthUV, nHeightUV, blur256, prec);

		  }
		  else if (nPel==1)
		  {
			FlowBlur(pDst[0], nDstPitches[0], pPlanesB[0]->GetPointer(0,0), pPlanesB[0]->GetPitch(), 
				VXFullYB, VXFullYF, VYFullYB, VYFullYF, VPitchY, 
				nWidth, nHeight, blur256, prec);
			FlowBlur(pDst[1], nDstPitches[1], pPlanesB[1]->GetPointer(0,0), pPlanesB[1]->GetPitch(), 
				VXFullUVB, VXFullUVF, VYFullUVB, VYFullUVF, VPitchUV, 
				nWidthUV, nHeightUV, blur256, prec);
			FlowBlur(pDst[2], nDstPitches[2], pPlanesB[2]->GetPointer(0,0), pPlanesB[2]->GetPitch(), 
				VXFullUVB, VXFullUVF, VYFullUVB, VYFullUVF, VPitchUV, 
				nWidthUV, nHeightUV, blur256, prec);
		  }

		if ( (pixelType & VideoInfo::CS_YUY2) == VideoInfo::CS_YUY2 )
		{
			YUY2FromPlanes(pDstYUY2, nDstPitchYUY2, nWidth, nHeight,
								  pDst[0], nDstPitches[0], pDst[1], pDst[2], nDstPitches[1], isse);
		}
		return dst;		 
   }
   else 
   {
	   return src;
   }

}