// Create an overlay mask with the motion vectors

// See legal notice in Copying.txt for more information

// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
// http://www.gnu.org/copyleft/gpl.html .

#include "MaskFun.h"
#include <math.h>
#include <memory.h>


void MakeVectorOcclusionMask(MVClip &mvClip, int nBlkX, int nBlkY, double dMaskNormFactor, double fGamma, int nPel, BYTE * occMask, int occMaskPitch)		 
{ // analyse vectors field to detect occlusion
	// note: dMaskNormFactor = 1/ml
	double occnorm = 10* dMaskNormFactor/nPel; // empirical
			  for (int by=0; by<nBlkY; by++)
			  {
				  for (int bx=0; bx<nBlkX; bx++)
				  {
					  int occlusion = 0;
					  int i = bx + by*nBlkX; // current block
						const FakeBlockData &block = mvClip.GetBlock(0, i);
						int vx = block.GetMV().x; 
						int vy = block.GetMV().y; 
						if (bx > 0) // left neighbor
						{
							int i1 = i-1;
							const FakeBlockData &block1 = mvClip.GetBlock(0, i1);
							int vx1 = block1.GetMV().x; 
							//int vy1 = block1.GetMV().y; 
							if (vx1>vx) occlusion += (vx1-vx); // only positive (abs)
						}
						if (bx < nBlkX-1) // right neighbor
						{
							int i1 = i+1;
							const FakeBlockData &block1 = mvClip.GetBlock(0, i1);
							int vx1 = block1.GetMV().x; 
							//int vy1 = block1.GetMV().y; 
							if (vx1<vx) occlusion += vx-vx1;
						}
						if (by > 0) // top neighbor
						{
							int i1 = i - nBlkX;
							const FakeBlockData &block1 = mvClip.GetBlock(0, i1);
							//int vx1 = block1.GetMV().x; 
							int vy1 = block1.GetMV().y; 
							if (vy1>vy) occlusion += vy1-vy;
						}
						if (by < nBlkY-1) // bottom neighbor
						{
							int i1 = i + nBlkX;
							const FakeBlockData &block1 = mvClip.GetBlock(0, i1);
							//int vx1 = block1.GetMV().x; 
							int vy1 = block1.GetMV().y; 
							if (vy1<vy) occlusion += vy-vy1;
						}
						if (fGamma == 1.0)
							occMask[bx + by*occMaskPitch] = min((int)(255 * occlusion*occnorm),255);
						else
							occMask[bx + by*occMaskPitch]= min((int)(255 * pow(occlusion*occnorm, fGamma) ), 255);
				  }
			  }
}

void VectorMasksToOcclusionMask(BYTE *VXMask, BYTE *VYMask, int nBlkX, int nBlkY, double dMaskNormFactor, double fGamma, int nPel, BYTE * occMask)		 
{ // analyse vectors field to detect occlusion
	// note: dMaskNormFactor = 1/ml
	double occnorm = 10* dMaskNormFactor/nPel; // empirical
			  for (int by=0; by<nBlkY; by++)
			  {
				  for (int bx=0; bx<nBlkX; bx++)
				  {
					  int occlusion = 0;
					  int i = bx + by*nBlkX; // current block
						int vx = VXMask[i]; 
						int vy = VYMask[i]; 
						if (bx > 0) // left neighbor
						{
							int i1 = i-1;
							int vx1 = VXMask[i1]; 
//							int vy1 = VYMask[i1]; 
							if (vx1>vx) occlusion += (vx1-vx); // only positive (abs)
						}
						if (bx < nBlkX-1) // right neighbor
						{
							int i1 = i+1;
							int vx1 = VXMask[i1]; 
//							int vy1 = VYMask[i1]; 
							if (vx1<vx) occlusion += vx-vx1;
						}
						if (by > 0) // top neighbor
						{
							int i1 = i - nBlkX;
//							int vx1 = VXMask[i1]; 
							int vy1 = VYMask[i1]; 
							if (vy1>vy) occlusion += vy1-vy;
						}
						if (by < nBlkY-1) // bottom neighbor
						{
							int i1 = i + nBlkX;
//							int vx1 = VXMask[i1]; 
							int vy1 = VYMask[i1]; 
							if (vy1<vy) occlusion += vy-vy1;
						}
						if (fGamma == 1.0)
							occMask[i] = min((int)(255 * occlusion*occnorm),255);
						else
							occMask[i]= min((int)(255 * pow(occlusion*occnorm, fGamma) ), 255);
				  }
			  }
}

void MakeVectorSmallMasks(MVClip &mvClip, int nBlkX, int nBlkY, BYTE *VXSmallY, int pitchVXSmallY, BYTE*VYSmallY, int pitchVYSmallY)
{
	  // make  vector vx and vy small masks
	 // 1. ATTENTION: vectors are assumed SHORT (|vx|, |vy| < 127) !
	 // 2. they will be zeroed if not
	// 3. added 128 to all values
	  for (int by=0; by<nBlkY; by++)
	  {
		  for (int bx=0; bx<nBlkX; bx++)
		  {
			  int i = bx + by*nBlkX;
				const FakeBlockData &block = mvClip.GetBlock(0, i);
				int vx = block.GetMV().x; 
				int vy = block.GetMV().y; 
				if (abs(vx)>127 || abs(vy)>127) 
				{
					vx = 0; // reset large vectors to zero
					vy = 0;
				}
				VXSmallY[bx+by*pitchVXSmallY] = vx + 128; // luma
				VYSmallY[bx+by*pitchVYSmallY] = vy + 128; // luma
		  }
	  }
}

void VectorSmallMaskYToHalfUV(BYTE * VSmallY, int nBlkX, int nBlkY, BYTE *VSmallUV, int ratioUV)
{
	if (ratioUV==2)
	{
	// YV12 colorformat
	  for (int by=0; by<nBlkY; by++)
	  {
		  for (int bx=0; bx<nBlkX; bx++)
		  {
				VSmallUV[bx] = ((VSmallY[bx]-128)>>1) + 128; // chroma 
		  }
		  VSmallY += nBlkX;
		  VSmallUV += nBlkX;
	  }
	}
	else // ratioUV==1
	{
	// Height YUY2 colorformat
	  for (int by=0; by<nBlkY; by++)
	  {
		  for (int bx=0; bx<nBlkX; bx++)
		  {
				VSmallUV[bx] = VSmallY[bx]; // chroma 
		  }
		  VSmallY += nBlkX;
		  VSmallUV += nBlkX;
	  }
	}

}


void Merge4PlanesToBig(BYTE *pel2Plane, int pel2Pitch, const BYTE *pPlane0, const BYTE *pPlane1, 
					  const BYTE *pPlane2, const BYTE * pPlane3, int width, int height, int pitch, bool isse)
{
	// copy refined planes to big one plane
	if (!isse) 
	{
		for (int h=0; h<height; h++)
		{
			for (int w=0; w<width; w++)
			{
				pel2Plane[w<<1] = pPlane0[w];
				pel2Plane[(w<<1) +1] = pPlane1[w];
			}
			pel2Plane += pel2Pitch;
			for (int w=0; w<width; w++)
			{
				pel2Plane[w<<1] = pPlane2[w];
				pel2Plane[(w<<1) +1] = pPlane3[w];
			}
			pel2Plane += pel2Pitch;
			pPlane0 += pitch;
			pPlane1 += pitch;
			pPlane2 += pitch;
			pPlane3 += pitch;
		}
	}
	else // isse - not very optimized
	{
		_asm 
		{
			push ebx;
			push esi;
			push edi;
			mov edi, pel2Plane;
			mov esi, pPlane0;
			mov edx, pPlane1;

			mov ebx, height;
looph1:
			mov ecx, width;
			mov eax, 0;
align 16
loopw1:
			movd mm0, [esi+eax];
			movd mm1, [edx+eax];
			punpcklbw mm0, mm1;
			shl eax, 1;
			movq [edi+eax], mm0;
			shr eax, 1;
			add eax, 4;
			cmp eax, ecx;
			jl loopw1;

			mov eax, pel2Pitch;
			add edi, eax;
			add edi, eax;
			mov eax, pitch;
			add esi, eax;
			add edx, eax;
			dec ebx;
			cmp ebx, 0;
			jg looph1;

			mov edi, pel2Plane;
			mov esi, pPlane2;
			mov edx, pPlane3;

			mov eax, pel2Pitch;
			add edi, eax;

			mov ebx, height;
looph2:
			mov ecx, width;
			mov eax, 0;
align 16
loopw2:
			movd mm0, [esi+eax];
			movd mm1, [edx+eax];
			punpcklbw mm0, mm1;
			shl eax, 1;
			movq [edi+eax], mm0;
			shr eax, 1;
			add eax, 4;
			cmp eax, ecx;
			jl loopw2;

			mov eax, pel2Pitch;
			add edi, eax;
			add edi, eax;
			mov eax, pitch;
			add esi, eax;
			add edx, eax;
			dec ebx;
			cmp ebx, 0;
			jg looph2;

			pop edi;
			pop esi;
			pop ebx;
			emms;

		}
	}
}
