#ifndef __COPYCODE_H__
#define __COPYCODE_H__

#include <memory.h>
#include "MVInterface.h"

void BitBlt(unsigned char* dstp, int dst_pitch, const unsigned char* srcp, int src_pitch, int row_size, int height, bool isse);
void asm_BitBlt_ISSE(unsigned char* dstp, int dst_pitch, const unsigned char* srcp, int src_pitch, int row_size, int height);
void memcpy_amd(void *dest, const void *src, size_t n);
void MemZoneSet(unsigned char *ptr, unsigned char value, int width,
				int height, int offsetX, int offsetY, int pitch);

typedef void (COPYFunction)(unsigned char *pDst, int nDstPitch,
                            const unsigned char *pSrc, int nSrcPitch);

template<int nBlkWidth, int nBlkHeight>
void Copy_C(BYTE *pDst, int nDstPitch, const BYTE *pSrc, int nSrcPitch)
{
   for ( int j = 0; j < nBlkHeight; j++ )
   {
//      for ( int i = 0; i < nBlkWidth; i++ )  //  waste cycles removed by Fizick in v1.2
         memcpy(pDst, pSrc, nBlkWidth);
      pDst += nDstPitch;
      pSrc += nSrcPitch;
   }
}

template<int nBlkSize>
void Copy_C(BYTE *pDst, int nDstPitch, const BYTE *pSrc, int nSrcPitch)
{
   Copy_C<nBlkSize, nBlkSize>(pDst, nDstPitch, pSrc, nSrcPitch);
}

template<int nBlkWidth, int nBlkHeight>
void Copy_mmx(BYTE *pDst, int nDstPitch, const BYTE *pSrc, int nSrcPitch)
{
   int j = 0;
   for ( ; j < nBlkHeight; j += 16 )
   {
      int i = 0;
      for ( ; i < nBlkWidth; i += 16 )
         Copy16_mmx(pDst + i + j * nDstPitch, nDstPitch, pSrc + i + j * nSrcPitch, nSrcPitch);
      for ( ; i < nBlkWidth; i += 8 )
      {
         Copy8_mmx(pDst + i + j * nDstPitch, nDstPitch, pSrc + i + j * nSrcPitch, nSrcPitch);
         Copy8_mmx(pDst + i + (j + 8) * nDstPitch, nDstPitch, pSrc + i + (j + 8) * nSrcPitch, nSrcPitch);
      }
      for ( ; i < nBlkWidth; i += 4 )
      {
         Copy4_mmx(pDst + i + j * nDstPitch, nDstPitch, pSrc + i + j * nSrcPitch, nSrcPitch);
         Copy4_mmx(pDst + i + (j + 4) * nDstPitch, nDstPitch, pSrc + i + (j + 4) * nSrcPitch, nSrcPitch);
         Copy4_mmx(pDst + i + (j + 8) * nDstPitch, nDstPitch, pSrc + i + (j + 8) * nSrcPitch, nSrcPitch);
         Copy4_mmx(pDst + i + (j + 12) * nDstPitch, nDstPitch, pSrc + i + (j + 12) * nSrcPitch, nSrcPitch);
      }
      for ( ; i < nBlkWidth; i += 2 )
      {
         Copy2_mmx(pDst + i + j * nDstPitch, nDstPitch, pSrc + i + j * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + (j + 2) * nDstPitch, nDstPitch, pSrc + i + (j + 2) * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + (j + 4) * nDstPitch, nDstPitch, pSrc + i + (j + 4) * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + (j + 6) * nDstPitch, nDstPitch, pSrc + i + (j + 6) * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + (j + 8) * nDstPitch, nDstPitch, pSrc + i + (j + 8) * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + (j + 10) * nDstPitch, nDstPitch, pSrc + i + (j + 10) * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + (j + 12) * nDstPitch, nDstPitch, pSrc + i + (j + 12) * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + (j + 14) * nDstPitch, nDstPitch, pSrc + i + (j + 14) * nSrcPitch, nSrcPitch);
      }
   }
   for ( ; j < nBlkHeight; j += 8 )
   {
      int i = 0;
      for ( ; i < nBlkWidth; i += 8 )
         Copy8_mmx(pDst + i + j * nDstPitch, nDstPitch, pSrc + i + j * nSrcPitch, nSrcPitch);
      for ( ; i < nBlkWidth; i += 4 )
      {
         Copy4_mmx(pDst + i + j * nDstPitch, nDstPitch, pSrc + i + j * nSrcPitch, nSrcPitch);
         Copy4_mmx(pDst + i + 4 + j * nDstPitch, nDstPitch, pSrc + i + 4 + j * nSrcPitch, nSrcPitch);
      }
      for ( ; i < nBlkWidth; i += 2 )
      {
         Copy2_mmx(pDst + i + j * nDstPitch, nDstPitch, pSrc + i + j * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + 2 + j * nDstPitch, nDstPitch, pSrc + i + 2 + j * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + 4 + j * nDstPitch, nDstPitch, pSrc + i + 4 + j * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + 6 + j * nDstPitch, nDstPitch, pSrc + i + 6 + j * nSrcPitch, nSrcPitch);
      }
   }
   for ( ; j < nBlkHeight; j += 4 )
   {
      int i = 0;
      for ( ; i < nBlkWidth; i += 4 )
         Copy4_mmx(pDst + i + j * nDstPitch, nDstPitch, pSrc + i + j * nSrcPitch, nSrcPitch);
      for ( ; i < nBlkWidth; i += 2 )
      {
         Copy2_mmx(pDst + i + j * nDstPitch, nDstPitch, pSrc + i + j * nSrcPitch, nSrcPitch);
         Copy2_mmx(pDst + i + 2 + j * nDstPitch, nDstPitch, pSrc + i + 2 + j * nSrcPitch, nSrcPitch);
      }
   }
   for ( ; j < nBlkHeight; j += 2 )
   {
      int i = 0;
      for ( ; i < nBlkWidth; i += 2 )
         Copy2_mmx(pDst + i + j * nDstPitch, nDstPitch, pSrc + i + j * nSrcPitch, nSrcPitch);
   }
}

extern "C" void __cdecl Copy16_mmx(BYTE *pDst, int nDstPitch,
                                   const BYTE *pSrc, int nSrcPitch);

extern "C" void __cdecl Copy8_mmx(BYTE *pDst, int nDstPitch,
                                  const BYTE *pSrc, int nSrcPitch);

extern "C" void __cdecl Copy4_mmx(BYTE *pDst, int nDstPitch,
                                  const BYTE *pSrc, int nSrcPitch);

extern "C" void __cdecl Copy2_mmx(BYTE *pDst, int nDstPitch,
                                  const BYTE *pSrc, int nSrcPitch);

extern "C" void __cdecl Copy4x8_mmx(BYTE *pDst, int nDstPitch,
                                  const BYTE *pSrc, int nSrcPitch);

extern "C" void __cdecl Copy8x16_mmx(BYTE *pDst, int nDstPitch,
                                  const BYTE *pSrc, int nSrcPitch);

#endif