Serious-Engine/Sources/Engine/Graphics/TextureEffects.cpp
Ryan C. Gordon 24cb244d43 First attempt to hand-merge Ryan's Linux and Mac OS X port.
This was a _ton_ of changes, made 15 years ago, so there are probably some
problems to work out still.

Among others: Engine/Base/Stream.* was mostly abandoned and will need to be
re-ported.

Still, this is a pretty good start, and probably holds a world record for
lines of changes or something.  :)
2016-03-28 23:46:13 -04:00

3437 lines
114 KiB
C++

/* Copyright (c) 2002-2012 Croteam Ltd. All rights reserved. */
#include "Engine/StdH.h"
#include <Engine/Graphics/Texture.h>
#include <Engine/Graphics/TextureEffects.h>
#include <Engine/Math/Functions.h>
#include <Engine/Base/Timer.h>
#include <Engine/Base/Statistics_Internal.h>
#include <Engine/Templates/DynamicArray.cpp>
#include <Engine/Templates/Stock_CTextureData.h>
#include <Engine/Templates/StaticArray.cpp>
// asm shortcuts
#define O offset
#define Q qword ptr
#define D dword ptr
#define W word ptr
#define B byte ptr
#if (defined USE_PORTABLE_C)
#define ASMOPT 0
#elif (defined __MSVC_INLINE__)
#define ASMOPT 1
#elif (defined __GNU_INLINE__)
#define ASMOPT 1
#else
#define ASMOPT 0
#endif
static __int64 mmBaseWidthShift=0;
static __int64 mmBaseWidth=0;
static __int64 mmBaseWidthMask=0;
static __int64 mmBaseHeightMask=0;
static __int64 mmBaseMasks=0;
static __int64 mmShift=0;
#if (defined __GNUC__)
/*
* If these are "const" vars, they get optimized to hardcoded values when gcc
* builds with optimization, which means the linker can't resolve the
* references to them in the inline ASM. That's obnoxious.
*/
static __int64 mm1LO = 0x0000000000000001ll;
static __int64 mm1HI = 0x0000000100000000ll;
static __int64 mm1HILO = 0x0000000100000001ll;
static __int64 mm0001 = 0x0000000000000001ll;
static __int64 mm0010 = 0x0000000000010000ll;
static __int64 mm00M0 = 0x00000000FFFF0000ll;
static void *force_syms_to_exist = NULL;
void asm_force_mm1LO() { force_syms_to_exist = &mm1LO; }
void asm_force_mm1HI() { force_syms_to_exist = &mm1HI; }
void asm_force_mm1HILO() { force_syms_to_exist = &mm1HILO; }
void asm_force_mm0001() { force_syms_to_exist = &mm0001; }
void asm_force_mm0010() { force_syms_to_exist = &mm0010; }
void asm_force_mm00M0() { force_syms_to_exist = &mm00M0; }
void asm_force_mmBaseWidthShift() { force_syms_to_exist = &mmBaseWidthShift; }
void asm_force_mmBaseWidth() { force_syms_to_exist = &mmBaseWidth; }
void asm_force_mmBaseWidthMask() { force_syms_to_exist = &mmBaseWidthMask; }
void asm_force_mmBaseHeightMask() { force_syms_to_exist = &mmBaseHeightMask; }
void asm_force_mmBaseMasks() { force_syms_to_exist = &mmBaseMasks; }
void asm_force_mmShift() { force_syms_to_exist = &mmShift; }
#else
static const __int64 mm1LO = 0x0000000000000001;
static const __int64 mm1HI = 0x0000000100000000;
static const __int64 mm1HILO = 0x0000000100000001;
static const __int64 mm0001 = 0x0000000000000001;
static const __int64 mm0010 = 0x0000000000010000;
static const __int64 mm00M0 = 0x00000000FFFF0000;
#endif
// speed table
static SBYTE asbMod3Sub1Table[256];
static BOOL bTableSet = FALSE;
static CTextureData *_ptdEffect, *_ptdBase;
static PIX _pixTexWidth, _pixTexHeight;
static PIX _pixBufferWidth, _pixBufferHeight;
static ULONG _ulBufferMask;
static INDEX _iWantedMipLevel;
static UBYTE *_pubDrawBuffer;
static SWORD *_pswDrawBuffer;
// randomizer
static ULONG ulRNDSeed;
inline void Randomize( ULONG ulSeed)
{
if( ulSeed==0) ulSeed = 0x87654321;
ulRNDSeed = ulSeed*262147;
};
inline ULONG Rnd(void)
{
ulRNDSeed = ulRNDSeed*262147;
return ulRNDSeed;
};
#define RNDW (Rnd()>>16)
// Initialize the texture effect source.
void CTextureEffectSource::Initialize( class CTextureEffectGlobal *ptegGlobalEffect,
ULONG ulEffectSourceType, PIX pixU0, PIX pixV0,
PIX pixU1, PIX pixV1)
{ // remember global effect for cross linking
tes_ptegGlobalEffect = ptegGlobalEffect;
tes_ulEffectSourceType = ulEffectSourceType;
// obtain effect source table for current effect class
struct TextureEffectSourceType *patestSourceEffectTypes =
_ategtTextureEffectGlobalPresets[ ptegGlobalEffect->teg_ulEffectType].tet_atestEffectSourceTypes;
// init for animating
patestSourceEffectTypes[ulEffectSourceType].test_Initialize(this, pixU0, pixV0, pixU1, pixV1);
}
// Animate the texture effect source.
void CTextureEffectSource::Animate(void)
{
// obtain effect source table for current effect class
struct TextureEffectSourceType *patestSourceEffectTypes =
_ategtTextureEffectGlobalPresets[ tes_ptegGlobalEffect->teg_ulEffectType]
.tet_atestEffectSourceTypes;
// animating it
patestSourceEffectTypes[tes_ulEffectSourceType].test_Animate(this);
}
// ----------------------------------------
// SLONG WATER
// ----------------------------------------
inline void PutPixelSLONG_WATER( PIX pixU, PIX pixV, INDEX iHeight)
{
_pswDrawBuffer[(pixV*_pixBufferWidth+pixU)&_ulBufferMask] += iHeight;
}
inline void PutPixel9SLONG_WATER( PIX pixU, PIX pixV, INDEX iHeightMid)
{
INDEX iHeightSide = (iHeightMid*28053) >>16; // iHeight /0.851120 *0.364326;
INDEX iHeightDiag = (iHeightMid*12008) >>16; // iHeight /0.851120 *0.155951;
PutPixelSLONG_WATER( pixU-1, pixV-1, iHeightDiag);
PutPixelSLONG_WATER( pixU, pixV-1, iHeightSide);
PutPixelSLONG_WATER( pixU+1, pixV-1, iHeightDiag);
PutPixelSLONG_WATER( pixU-1, pixV, iHeightSide);
PutPixelSLONG_WATER( pixU, pixV, iHeightMid);
PutPixelSLONG_WATER( pixU+1, pixV, iHeightSide);
PutPixelSLONG_WATER( pixU-1, pixV+1, iHeightDiag);
PutPixelSLONG_WATER( pixU, pixV+1, iHeightSide);
PutPixelSLONG_WATER( pixU+1, pixV+1, iHeightDiag);
}
// ----------------------------------------
// UBYTE FIRE
// ----------------------------------------
inline void PutPixelUBYTE_FIRE( PIX pixU, PIX pixV, INDEX iHeight)
{
PIX pixLoc = (pixV*_pixBufferWidth+pixU) & _ulBufferMask;
_pubDrawBuffer[pixLoc] = Clamp( _pubDrawBuffer[pixLoc] +iHeight, 0L, 255L);
}
inline void PutPixel9UBYTE_FIRE( PIX pixU, PIX pixV, INDEX iHeightMid)
{
INDEX iHeightSide = (iHeightMid*28053) >>16; // iHeight /0.851120 *0.364326;
INDEX iHeightDiag = (iHeightMid*12008) >>16; // iHeight /0.851120 *0.155951;
PutPixelUBYTE_FIRE( pixU-1, pixV-1, iHeightDiag);
PutPixelUBYTE_FIRE( pixU, pixV-1, iHeightSide);
PutPixelUBYTE_FIRE( pixU+1, pixV-1, iHeightDiag);
PutPixelUBYTE_FIRE( pixU-1, pixV, iHeightSide);
PutPixelUBYTE_FIRE( pixU, pixV, iHeightMid);
PutPixelUBYTE_FIRE( pixU+1, pixV, iHeightSide);
PutPixelUBYTE_FIRE( pixU-1, pixV+1, iHeightDiag);
PutPixelUBYTE_FIRE( pixU, pixV+1, iHeightSide);
PutPixelUBYTE_FIRE( pixU+1, pixV+1, iHeightDiag);
}
inline void PutPixel25UBYTE_FIRE( PIX pixU, PIX pixV, INDEX iHeightMid)
{
INDEX iHeightSide = (iHeightMid*28053) >>16; // iHeight /0.851120 *0.364326;
INDEX iHeightDiag = (iHeightMid*12008) >>16; // iHeight /0.851120 *0.155951;
PutPixelUBYTE_FIRE( pixU-2, pixV-2, iHeightDiag);
PutPixelUBYTE_FIRE( pixU-1, pixV-2, iHeightSide);
PutPixelUBYTE_FIRE( pixU, pixV-2, iHeightSide);
PutPixelUBYTE_FIRE( pixU+1, pixV-2, iHeightSide);
PutPixelUBYTE_FIRE( pixU+2, pixV-2, iHeightDiag);
PutPixelUBYTE_FIRE( pixU-2, pixV-1, iHeightSide);
PutPixelUBYTE_FIRE( pixU-1, pixV-1, iHeightSide);
PutPixelUBYTE_FIRE( pixU, pixV-1, iHeightMid);
PutPixelUBYTE_FIRE( pixU+1, pixV-1, iHeightSide);
PutPixelUBYTE_FIRE( pixU+2, pixV-1, iHeightSide);
PutPixelUBYTE_FIRE( pixU-2, pixV, iHeightSide);
PutPixelUBYTE_FIRE( pixU-1, pixV, iHeightMid);
PutPixelUBYTE_FIRE( pixU, pixV, iHeightMid);
PutPixelUBYTE_FIRE( pixU+1, pixV, iHeightMid);
PutPixelUBYTE_FIRE( pixU+2, pixV, iHeightSide);
PutPixelUBYTE_FIRE( pixU-2, pixV+1, iHeightSide);
PutPixelUBYTE_FIRE( pixU-1, pixV+1, iHeightSide);
PutPixelUBYTE_FIRE( pixU, pixV+1, iHeightMid);
PutPixelUBYTE_FIRE( pixU+1, pixV+1, iHeightSide);
PutPixelUBYTE_FIRE( pixU+2, pixV+1, iHeightSide);
PutPixelUBYTE_FIRE( pixU+2, pixV+2, iHeightDiag);
PutPixelUBYTE_FIRE( pixU-1, pixV+2, iHeightSide);
PutPixelUBYTE_FIRE( pixU, pixV+2, iHeightSide);
PutPixelUBYTE_FIRE( pixU+1, pixV+2, iHeightSide);
PutPixelUBYTE_FIRE( pixU-2, pixV+2, iHeightDiag);
}
/////////////////////////////////////////////////////////////////////
// WATER EFFECTS
/////////////////////////////////////////////////////////////////////
// WARNING: Changing this value will BREAK the inline asm on
// GNU-based platforms (Linux, etc.) YOU HAVE BEEN WARNED.
#define DISTORTION 3 //3
///////////////// random surfer
struct Surfer {
FLOAT fU;
FLOAT fV;
FLOAT fAngle;
};
void InitializeRandomSurfer(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
Surfer &sf =
(*((Surfer *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
sf.fU = pixU0;
sf.fV = pixV0;
sf.fAngle = RNDW&7;
}
void AnimateRandomSurfer(CTextureEffectSource *ptes)
{
Surfer &sf =
(*((Surfer *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PutPixel9SLONG_WATER((long) sf.fU, (long) sf.fV, 125);
sf.fU += 2*sin(sf.fAngle);
sf.fV += 2*cos(sf.fAngle);
PutPixel9SLONG_WATER((long) sf.fU, (long) sf.fV, 250);
if((RNDW&15)==0) {
sf.fAngle += 3.14f/7.0f;
}
if((RNDW&15)==0) {
sf.fAngle -= 3.14f/5.0f;
}
}
///////////////// raindrops
struct Raindrop {
UBYTE pixU;
UBYTE pixV;
SWORD iHeight;
SWORD iIndex;
};
void InitializeRaindrops(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1, int iHeight)
{
for (int iIndex=0; iIndex<5; iIndex++) {
Raindrop &rd =
((Raindrop&) ptes->tes_tespEffectSourceProperties.tesp_achDummy[iIndex*sizeof(Raindrop)]);
rd.pixU = RNDW&(_pixBufferWidth -1);
rd.pixV = RNDW&(_pixBufferHeight-1);
rd.iHeight = RNDW&iHeight;
rd.iIndex = iIndex*8;
}
}
void InitializeRaindropsStandard(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) {
InitializeRaindrops(ptes, pixU0, pixV0, pixU1, pixV1, 255);
}
void InitializeRaindropsBig(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) {
InitializeRaindrops(ptes, pixU0, pixV0, pixU1, pixV1, 1023);
}
void InitializeRaindropsSmall(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) {
InitializeRaindrops(ptes, pixU0, pixV0, pixU1, pixV1, 31);
}
void AnimateRaindrops(CTextureEffectSource *ptes, int iHeight)
{
for (int iIndex=0; iIndex<5; iIndex++) {
Raindrop &rd =
((Raindrop&) ptes->tes_tespEffectSourceProperties.tesp_achDummy[iIndex*sizeof(Raindrop)]);
if (rd.iIndex < 48) {
rd.iIndex++;
if (rd.iIndex < 8) {
PutPixel9SLONG_WATER(rd.pixU, rd.pixV, (long) sin(rd.iIndex/4.0f*(-3.14f))*rd.iHeight);
}
} else {
rd.pixU = RNDW&(_pixBufferWidth -1);
rd.pixV = RNDW&(_pixBufferHeight-1);
rd.iHeight = RNDW&iHeight;
rd.iIndex = 0;
}
}
}
void AnimateRaindropsStandard(CTextureEffectSource *ptes) {
AnimateRaindrops(ptes, 255);
}
void AnimateRaindropsBig(CTextureEffectSource *ptes) {
AnimateRaindrops(ptes, 1023);
}
void AnimateRaindropsSmall(CTextureEffectSource *ptes) {
AnimateRaindrops(ptes, 31);
}
///////////////// oscilator
struct Oscilator {
UBYTE pixU;
UBYTE pixV;
FLOAT fAngle;
};
void InitializeOscilator(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
Oscilator &os =
(*((Oscilator *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
os.pixU = pixU0;
os.pixV = pixV0;
os.fAngle = -3.14f;
}
void AnimateOscilator(CTextureEffectSource *ptes)
{
Oscilator &os =
(*((Oscilator *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PutPixel9SLONG_WATER(os.pixU, os.pixV, (long) sin(os.fAngle)*150);
os.fAngle += (3.14f/6);
}
///////////////// Vertical Line
struct VertLine{
UBYTE pixU;
UBYTE pixV;
UWORD uwSize;
FLOAT fAngle;
};
void InitializeVertLine(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
VertLine &vl =
(*((VertLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
vl.pixU = pixU0;
vl.pixV = pixV0;
vl.fAngle = -3.14f;
if (pixV0==pixV1) {
vl.uwSize = 16;
} else {
vl.uwSize = abs(pixV1-pixV0);
}
}
void AnimateVertLine(CTextureEffectSource *ptes)
{
VertLine &vl =
(*((VertLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PIX pixV = vl.pixV;
for (int iCnt=0; iCnt<vl.uwSize; iCnt++) {
PutPixelSLONG_WATER(vl.pixU, pixV, (long) (sin(vl.fAngle)*25));
pixV = (pixV+1)&(_pixBufferHeight-1);
}
vl.fAngle += (3.14f/6);
}
///////////////// Horizontal Line
struct HortLine{
UBYTE pixU;
UBYTE pixV;
UWORD uwSize;
FLOAT fAngle;
};
void InitializeHortLine(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
HortLine &hl =
(*((HortLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
hl.pixU = pixU0;
hl.pixV = pixV0;
hl.fAngle = -3.14f;
if (pixU0==pixU1) {
hl.uwSize = 16;
} else {
hl.uwSize = abs(pixU1-pixU0);
}
}
void AnimateHortLine(CTextureEffectSource *ptes)
{
HortLine &hl =
(*((HortLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PIX pixU = hl.pixU;
for (int iCnt=0; iCnt<hl.uwSize; iCnt++) {
PutPixelSLONG_WATER(pixU, hl.pixV, (long) (sin(hl.fAngle)*25));
pixU = (pixU+1)&(_pixBufferWidth-1);
}
hl.fAngle += (3.14f/6);
}
/////////////////////////////////////////////////////////////////////
// FIRE EFFECTS
/////////////////////////////////////////////////////////////////////
///////////////// Fire Point
struct FirePoint{
UBYTE pixU;
UBYTE pixV;
};
void InitializeFirePoint(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ft.pixU = pixU0;
ft.pixV = pixV0;
}
void AnimateFirePoint(CTextureEffectSource *ptes)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PutPixel9UBYTE_FIRE(ft.pixU, ft.pixV, 255);
}
void InitializeRandomFirePoint(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ft.pixU = pixU0;
ft.pixV = pixV0;
}
void AnimateRandomFirePoint(CTextureEffectSource *ptes)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PutPixel9UBYTE_FIRE(ft.pixU, ft.pixV, RNDW&255);
}
void InitializeFireShakePoint(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ft.pixU = pixU0;
ft.pixV = pixV0;
}
void AnimateFireShakePoint(CTextureEffectSource *ptes)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
UBYTE pixU, pixV;
pixU = RNDW%3 - 1;
pixV = RNDW%3 - 1;
PutPixel9UBYTE_FIRE(ft.pixU+pixU, ft.pixV+pixV, 255);
}
///////////////// Fire Place
#define FIREPLACE_SIZE 60
struct FirePlace{
UBYTE pixU;
UBYTE pixV;
UBYTE ubWidth;
UBYTE aubFire[FIREPLACE_SIZE];
};
void InitializeFirePlace(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FirePlace &fp =
(*((FirePlace *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
fp.pixU = pixU0;
fp.pixV = pixV0;
fp.ubWidth = abs(pixU1-pixU0);
if (fp.ubWidth>FIREPLACE_SIZE) fp.ubWidth=FIREPLACE_SIZE;
if (fp.ubWidth<10) fp.ubWidth = 10;
// clear fire array
for (int iCnt=0; iCnt<fp.ubWidth; iCnt++) {
fp.aubFire[iCnt] = 0;
}
}
void AnimateFirePlace(CTextureEffectSource *ptes)
{
INDEX iIndex;
FirePlace &fp =
(*((FirePlace *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ULONG ulRND = RNDW&255;
// match
if (ulRND>200) {
ULONG ulMatchIndex = ulRND%(fp.ubWidth-5);
for (iIndex=0; iIndex<5; iIndex++) {
fp.aubFire[ulMatchIndex+iIndex] = 255;
}
// water
} else if (ulRND<50) {
for (iIndex=0; iIndex<10; iIndex++) {
fp.aubFire[RNDW%fp.ubWidth] = 0;
}
}
// fix fire place
for (iIndex=0; iIndex<fp.ubWidth; iIndex++) {
UBYTE ubFlame = fp.aubFire[iIndex];
// flame is fading ?
if (ubFlame < 50) {
// starting to burn
if (ubFlame > 10) {
ubFlame += RNDW%30; //30
// give more fire
} else {
ubFlame += RNDW%30+30; //30,30
}
}
fp.aubFire[iIndex] = ubFlame;
}
// water on edges
for (iIndex=0; iIndex<4; iIndex++) {
INDEX iWater = RNDW%4;
fp.aubFire[iWater] = 0;
fp.aubFire[fp.ubWidth-1-iWater] = 0;
}
// smooth fire place
for (iIndex=1; iIndex<(fp.ubWidth-1); iIndex++) {
fp.aubFire[iIndex] = (fp.aubFire[iIndex-1]+fp.aubFire[iIndex]+fp.aubFire[iIndex+1])/3;
}
// draw fire place in buffer
for (iIndex=0; iIndex<fp.ubWidth; iIndex++) {
PutPixel9UBYTE_FIRE(fp.pixU+iIndex, fp.pixV, fp.aubFire[iIndex]);
}
}
///////////////// Fire Roler
struct FireRoler{
UBYTE pixU;
UBYTE pixV;
//FLOAT fRadius;
FLOAT fRadiusU;
FLOAT fRadiusV;
FLOAT fAngle;
FLOAT fAngleAdd;
};
void InitializeFireRoler(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireRoler &fr =
(*((FireRoler *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
fr.pixU = pixU0;
fr.pixV = pixV0;
if (pixU0==pixU1 && pixV0==pixV1) {
//fr.fRadius = 3;
fr.fRadiusU = 3;
fr.fRadiusV = 3;
fr.fAngleAdd = (3.14f/6);
} else {
//fr.fRadius = sqrt((pixU1-pixU0)*(pixU1-pixU0) + (pixV1-pixV0)*(pixV1-pixV0));
fr.fRadiusU = pixU1-pixU0;
fr.fRadiusV = pixV1-pixV0;
//fr.fAngleAdd = (3.14f/((fr.fRadius)*2));
fr.fAngleAdd = (3.14f/(Abs(fr.fRadiusU)+Abs(fr.fRadiusV)));
}
fr.fAngle = 0;
}
void AnimateFireRoler(CTextureEffectSource *ptes)
{
FireRoler &fr =
(*((FireRoler *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PutPixel9UBYTE_FIRE((long) (cos(fr.fAngle)*fr.fRadiusU + fr.pixU),
(long) (sin(fr.fAngle)*fr.fRadiusV + fr.pixV), 255);
fr.fAngle += fr.fAngleAdd;
PutPixel9UBYTE_FIRE((long) (cos(fr.fAngle)*fr.fRadiusU + fr.pixU),
(long) (sin(fr.fAngle)*fr.fRadiusV + fr.pixV), 200);
fr.fAngle += fr.fAngleAdd;
PutPixel9UBYTE_FIRE((long) (cos(fr.fAngle)*fr.fRadiusU + fr.pixU),
(long) (sin(fr.fAngle)*fr.fRadiusV + fr.pixV), 150);
fr.fAngle += fr.fAngleAdd;
}
///////////////// Fire Fall
#define FIREFALL_POINTS 100
struct FireFall{
UBYTE pixU;
UBYTE pixV;
ULONG ulWidth;
ULONG ulPointToReinitialize;
};
struct FireFallPixel{
UBYTE pixU;
UBYTE pixV;
UBYTE ubSpeed;
};
void InitializeFireFall(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireFall &ff =
(*((FireFall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ff.pixU = pixU0;
ff.pixV = pixV0;
if (pixU0==pixU1) {
ff.ulWidth = 15;
} else {
ff.ulWidth = abs(pixU1-pixU0);
}
// initialize fall points
ptes->tes_atepPixels.New(FIREFALL_POINTS);
ff.ulPointToReinitialize = 0;
for (INDEX iIndex=0; iIndex<FIREFALL_POINTS; iIndex++) {
FireFallPixel &ffp = ((FireFallPixel&) ptes->tes_atepPixels[iIndex]);
ffp.pixU = ff.pixU+(RNDW%ff.ulWidth);
ffp.pixV = ff.pixV+(RNDW%_pixBufferHeight);
ffp.ubSpeed = (RNDW&1)+2;
}
}
void AnimateFireFall(CTextureEffectSource *ptes)
{
FireFall &ff =
(*((FireFall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
// animate fall points
for (INDEX iIndex=0; iIndex<FIREFALL_POINTS; iIndex++) {
FireFallPixel &ffp = ((FireFallPixel&) ptes->tes_atepPixels[iIndex]);
// fall from fall
int iHeight = (RNDW&3)*64 + 40;
if (ffp.ubSpeed == 2) {
PutPixelUBYTE_FIRE(ffp.pixU+(RNDW%3)-1, ffp.pixV, iHeight);
PutPixelUBYTE_FIRE(ffp.pixU+(RNDW%3)-1, ffp.pixV+1, iHeight-40);
} else {
PutPixelUBYTE_FIRE(ffp.pixU, ffp.pixV, iHeight);
PutPixelUBYTE_FIRE(ffp.pixU, ffp.pixV+1, iHeight-40);
}
ffp.pixV+=ffp.ubSpeed;
// when falled down reinitialize
if (ffp.pixV >= _pixBufferHeight) {
if (ff.ulPointToReinitialize == iIndex) {
ff.ulPointToReinitialize++;
if (ff.ulPointToReinitialize >= FIREFALL_POINTS) ff.ulPointToReinitialize = 0;
ffp.pixU = ff.pixU+(RNDW%ff.ulWidth);
ffp.pixV -= _pixBufferHeight;
ffp.ubSpeed = (RNDW&1)+2;
} else {
ffp.pixV -= _pixBufferHeight;
}
}
}
}
///////////////// Fire Fountain
#define FIREFOUNTAIN_POINTS 100
struct FireFountain{
UBYTE pixU;
UBYTE pixV;
ULONG ulWidth;
ULONG ulBaseHeight;
ULONG ulRandomHeight;
};
struct FireFountainPixel{
SWORD pixU;
SWORD pixV;
UBYTE pixLastU;
UBYTE pixLastV;
SWORD sbSpeedU;
SWORD sbSpeedV;
};
void InitializeFireFountain(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireFountain &ff =
(*((FireFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ff.pixU = pixU0;
ff.pixV = pixV0;
// fountain width
if (pixU0==pixU1) {
ff.ulWidth = 31;
} else {
ff.ulWidth = abs(pixU1-pixU0)*2;
}
// fountain height
if (pixV0==pixV1) {
ff.ulBaseHeight = 120;
ff.ulRandomHeight = 40;
} else {
ff.ulBaseHeight = abs(pixV1-pixV0)*3;
ff.ulRandomHeight = abs(pixV1-pixV0);
}
// initialize fountain points
ptes->tes_atepPixels.New(FIREFOUNTAIN_POINTS*2);
for (INDEX iIndex=0; iIndex<FIREFOUNTAIN_POINTS*2; iIndex+=2) {
FireFountainPixel &ffp = ((FireFountainPixel&) ptes->tes_atepPixels[iIndex]);
ffp.pixU = (ff.pixU)<<6;
ffp.pixV = (RNDW%(_pixBufferHeight-(_pixBufferHeight>>3))+(_pixBufferHeight>>3))<<6;
ffp.pixLastU = (ffp.pixU)>>6;
ffp.pixLastV = (ffp.pixV)>>6;
ffp.sbSpeedU = 0;
ffp.sbSpeedV = 0;
}
}
void AnimateFireFountain(CTextureEffectSource *ptes)
{
FireFountain &ff =
(*((FireFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
// animate fountain points
for (INDEX iIndex=0; iIndex<FIREFOUNTAIN_POINTS*2; iIndex+=2) {
FireFountainPixel &ffp = ((FireFountainPixel&) ptes->tes_atepPixels[iIndex]);
// fall from fountain
PutPixelUBYTE_FIRE((ffp.pixU)>>6, (ffp.pixV)>>6, 200);
PutPixelUBYTE_FIRE(ffp.pixLastU, ffp.pixLastV, 150);
// move pixel
ffp.pixLastU = (ffp.pixU)>>6;
ffp.pixLastV = (ffp.pixV)>>6;
ffp.pixU+=ffp.sbSpeedU;
ffp.pixV-=ffp.sbSpeedV;
ffp.sbSpeedV-=8;
// when falled down reinitialize
if ((ffp.pixV>>6) >= (_pixBufferHeight-5)) {
ffp.pixU = (ff.pixU)<<6;
ffp.pixV = (ff.pixV)<<6;
ffp.pixLastU = (ffp.pixU)>>6;
ffp.pixLastV = (ffp.pixV)>>6;
ffp.sbSpeedU = (RNDW%ff.ulWidth)-(ff.ulWidth/2-1);
ffp.sbSpeedV = (RNDW%ff.ulRandomHeight)+ff.ulBaseHeight;
}
}
}
///////////////// Fire Fountain
#define FIRESIDEFOUNTAIN_POINTS 100
struct FireSideFountain{
UBYTE pixU;
UBYTE pixV;
ULONG ulBaseWidth;
ULONG ulRandomWidth;
ULONG ulSide;
};
struct FireSideFountainPixel{
SWORD pixU;
SWORD pixV;
UBYTE pixLastU;
UBYTE pixLastV;
SWORD sbSpeedU;
SWORD sbSpeedV;
};
void InitializeFireSideFountain(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireSideFountain &fsf =
(*((FireSideFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
fsf.pixU = pixU0;
fsf.pixV = pixV0;
// fountain width
if (pixU0==pixU1) {
fsf.ulBaseWidth = 80;
fsf.ulRandomWidth = 40;
fsf.ulSide = (pixU0>(_pixBufferWidth/2));
} else {
fsf.ulBaseWidth = abs(pixU1-pixU0)*2;
fsf.ulRandomWidth = abs(pixU1-pixU0);
fsf.ulSide = (pixU1<pixU0);
}
// initialize fountain points
ptes->tes_atepPixels.New(FIRESIDEFOUNTAIN_POINTS*2);
for (INDEX iIndex=0; iIndex<FIRESIDEFOUNTAIN_POINTS*2; iIndex+=2) {
FireSideFountainPixel &fsfp = ((FireSideFountainPixel&) ptes->tes_atepPixels[iIndex]);
fsfp.pixU = (fsf.pixU)<<6;
fsfp.pixV = (RNDW%(_pixBufferHeight-(_pixBufferHeight>>3))+(_pixBufferHeight>>3))<<6;
fsfp.pixLastU = (fsfp.pixU)>>6;
fsfp.pixLastV = (fsfp.pixV)>>6;
fsfp.sbSpeedU = 0;
fsfp.sbSpeedV = 0;
}
}
void AnimateFireSideFountain(CTextureEffectSource *ptes)
{
FireSideFountain &fsf =
(*((FireSideFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
// animate fountain points
for (INDEX iIndex=0; iIndex<FIRESIDEFOUNTAIN_POINTS*2; iIndex+=2) {
FireSideFountainPixel &fsfp = ((FireSideFountainPixel&) ptes->tes_atepPixels[iIndex]);
// fall from fountain
PutPixelUBYTE_FIRE((fsfp.pixU)>>6, (fsfp.pixV)>>6, 200);
PutPixelUBYTE_FIRE(fsfp.pixLastU, fsfp.pixLastV, 150);
// move pixel
fsfp.pixLastU = (fsfp.pixU)>>6;
fsfp.pixLastV = (fsfp.pixV)>>6;
fsfp.pixU+=fsfp.sbSpeedU;
fsfp.pixV-=fsfp.sbSpeedV;
fsfp.sbSpeedV-=8;
// when falled down reinitialize
if ((fsfp.pixV>>6) >= (_pixBufferHeight-5)) {
fsfp.pixU = (fsf.pixU)<<6;
fsfp.pixV = (fsf.pixV)<<6;
fsfp.pixLastU = (fsfp.pixU)>>6;
fsfp.pixLastV = (fsfp.pixV)>>6;
fsfp.sbSpeedU = (RNDW%fsf.ulRandomWidth)+fsf.ulBaseWidth;
if (fsf.ulSide) {
fsfp.sbSpeedU = -fsfp.sbSpeedU;
}
fsfp.sbSpeedV = 0;
}
}
}
///////////////// Fire Lightning
struct FireLightning{
FLOAT fpixUFrom;
FLOAT fpixVFrom;
FLOAT fpixUTo;
FLOAT fpixVTo;
FLOAT fvU;
FLOAT fvV;
FLOAT fvNormalU;
FLOAT fvNormalV;
FLOAT fDistance;
SLONG slCnt;
};
void InitializeFireLightning(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireLightning &fl =
(*((FireLightning *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
fl.fpixUFrom = (FLOAT) pixU0;
fl.fpixVFrom = (FLOAT) pixV0;
if (pixU0==pixU1 && pixV0==pixV1) {
fl.fpixUTo = Abs((FLOAT)_pixBufferWidth -fl.fpixUFrom);
fl.fpixVTo = Abs((FLOAT)_pixBufferHeight-fl.fpixVFrom);
} else {
fl.fpixUTo = (FLOAT) pixU1;
fl.fpixVTo = (FLOAT) pixV1;
}
fl.fDistance = sqrt((fl.fpixUTo-fl.fpixUFrom)*(fl.fpixUTo-fl.fpixUFrom)+
(fl.fpixVTo-fl.fpixVFrom)*(fl.fpixVTo-fl.fpixVFrom));
// vector
fl.fvU = (fl.fpixUTo-fl.fpixUFrom)/fl.fDistance;
fl.fvV = (fl.fpixVTo-fl.fpixVFrom)/fl.fDistance;
// normal vector
fl.fvNormalU = -fl.fvV;
fl.fvNormalV = fl.fvU;
// frame counter
fl.slCnt = 2;
}
void AnimateFireLightning(CTextureEffectSource *ptes)
{
FLOAT fU, fV, fLastU, fLastV;
FLOAT fDU, fDV, fCnt;
SLONG slRND;
ULONG ulDist;
FireLightning &fl =
(*((FireLightning *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
// last point -> starting point
fLastU = fl.fpixUFrom;
fLastV = fl.fpixVFrom;
fl.slCnt--;
if (fl.slCnt == 0) {
ulDist = 0;
while ((FLOAT)ulDist<fl.fDistance) {
// go away from source point to destination point
ulDist += (RNDW%5)+5;
if ((FLOAT)ulDist>=fl.fDistance) {
// move point to line end
fU = fl.fpixUTo;
fV = fl.fpixVTo;
} else {
// move point on line
fU = fl.fpixUFrom + fl.fvU*(FLOAT)ulDist;
fV = fl.fpixVFrom + fl.fvV*(FLOAT)ulDist;
// move point offset on normal line
slRND = (SLONG) (RNDW%11)-5;
fU += fl.fvNormalU*(FLOAT)slRND;
fV += fl.fvNormalV*(FLOAT)slRND;
}
// draw line
fDU = fU-fLastU;
fDV = fV-fLastV;
if (Abs(fDU)>Abs(fDV)) fCnt = Abs(fDU);
else fCnt = Abs(fDV);
fDU = fDU/fCnt;
fDV = fDV/fCnt;
while (fCnt>0.0f) {
PutPixelUBYTE_FIRE((PIX) fLastU, (PIX) fLastV, 255);
fLastU += fDU;
fLastV += fDV;
fCnt -= 1;
}
// store last point
fLastU = fU;
fLastV = fV;
}
fl.slCnt = 2;
}
}
///////////////// Fire Lightning Ball
#define FIREBALL_LIGHTNINGS 2
struct FireLightningBall{
FLOAT fpixU;
FLOAT fpixV;
FLOAT fRadiusU;
FLOAT fRadiusV;
};
void InitializeFireLightningBall(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireLightningBall &flb =
(*((FireLightningBall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
flb.fpixU = (FLOAT) pixU0;
flb.fpixV = (FLOAT) pixV0;
if (pixU0==pixU1 && pixV0==pixV1) {
flb.fRadiusU = 20;
flb.fRadiusV = 20;
} else {
flb.fRadiusU = pixU1-pixU0;
flb.fRadiusV = pixV1-pixV0;
}
}
void AnimateFireLightningBall(CTextureEffectSource *ptes)
{
FLOAT fU, fV, fLastU, fLastV, fvU, fvV, fvNormalU, fvNormalV;
FLOAT fDU, fDV, fCnt, fDistance;
FLOAT fDestU, fDestV, fAngle;
SLONG slRND;
ULONG ulDist;
FireLightningBall &flb =
(*((FireLightningBall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
for (int iBalls=0; iBalls<FIREBALL_LIGHTNINGS; iBalls++) {
// last point -> starting point
fLastU = flb.fpixU;
fLastV = flb.fpixV;
// destination point
fAngle = (FLOAT) RNDW/10000;
fDestU = flb.fpixU + flb.fRadiusU*cos(fAngle);
fDestV = flb.fpixV + flb.fRadiusV*sin(fAngle);
fDistance = sqrt((fDestU-fLastU)*(fDestU-fLastU)+
(fDestV-fLastV)*(fDestV-fLastV));
// vector
fvU = (fDestU-fLastU)/fDistance;
fvV = (fDestV-fLastV)/fDistance;
// normal vector
fvNormalU = -fvV;
fvNormalV = fvU;
ulDist = 0;
while ((FLOAT)ulDist<fDistance) {
// go away from source point to destination point
ulDist += (RNDW%5)+5;
if ((FLOAT)ulDist>=fDistance) {
// move point on line
fU = fDestU;
fV = fDestV;
} else {
// move point on line
fU = flb.fpixU + fvU*(FLOAT)ulDist;
fV = flb.fpixV + fvV*(FLOAT)ulDist;
// move point offset on normal line
slRND = (SLONG) (RNDW%11)-5;
fU += fvNormalU*(FLOAT)slRND;
fV += fvNormalV*(FLOAT)slRND;
}
// draw line
fDU = fU-fLastU;
fDV = fV-fLastV;
// counter
if (Abs(fDU)>Abs(fDV)) fCnt = Abs(fDU);
else fCnt = Abs(fDV);
fDU = fDU/fCnt;
fDV = fDV/fCnt;
while (fCnt>0.0f) {
PutPixelUBYTE_FIRE((PIX) fLastU, (PIX) fLastV, 255);
fLastU += fDU;
fLastV += fDV;
fCnt -= 1;
}
// store last point
fLastU = fU;
fLastV = fV;
}
}
}
///////////////// Fire Smoke
#define SMOKE_POINTS 50
struct FireSmoke{
FLOAT fpixU;
FLOAT fpixV;
};
struct FireSmokePoint{
FLOAT fpixU;
FLOAT fpixV;
FLOAT fSpeedV;
};
void InitializeFireSmoke(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireSmoke &fs =
(*((FireSmoke *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
fs.fpixU = (FLOAT) pixU0;
fs.fpixV = (FLOAT) pixV0;
if (pixU0==pixU1 && pixV0==pixV1) {
} else {
}
// initialize smoke points
ptes->tes_atepPixels.New(SMOKE_POINTS*2);
for (INDEX iIndex=0; iIndex<SMOKE_POINTS*2; iIndex+=2) {
FireSmokePoint &fsp = ((FireSmokePoint&) ptes->tes_atepPixels[iIndex]);
fsp.fpixU = FLOAT (pixU0 + (iIndex-(SMOKE_POINTS))/8);
fsp.fpixV = FLOAT (pixV0);
fsp.fSpeedV = 0.0f;
}
}
void AnimateFireSmoke(CTextureEffectSource *ptes)
{
int iHeat;
FLOAT fRatio = 32.0f / (FLOAT)_pixBufferHeight;
UBYTE pixU, pixV;
FireSmoke &fs =
(*((FireSmoke *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
// animate smoke points
for (INDEX iIndex=0; iIndex<SMOKE_POINTS*2; iIndex+=2) {
FireSmokePoint &fsp = ((FireSmokePoint&) ptes->tes_atepPixels[iIndex]);
pixU = RNDW%3 - 1;
pixV = RNDW%3 - 1;
if (fsp.fSpeedV<0.1f) {
PutPixelUBYTE_FIRE((PIX) fsp.fpixU, (PIX) fsp.fpixV, RNDW%128);
} else {
iHeat = int(fsp.fpixV*fRatio+1);
PutPixel25UBYTE_FIRE((PIX) fsp.fpixU+pixU, (PIX) fsp.fpixV+pixV, RNDW%iHeat);
}
// start moving up
if (fsp.fSpeedV<0.1f && (RNDW&255)==0) {
fsp.fSpeedV = 1.0f;
}
// move up
fsp.fpixV -= fsp.fSpeedV;
// at the end of texture go on bottom
if (fsp.fpixV<=(FLOAT)_pixBufferHeight) {
fsp.fpixV = fs.fpixV;
fsp.fSpeedV = 0.0f;
}
}
}
///////////////// Water
void InitializeWater(void)
{
Randomize( (ULONG)(_pTimer->GetHighPrecisionTimer().GetMilliseconds()));
}
/*******************************
Water Animation
********************************/
static void AnimateWater( SLONG slDensity)
{
_sfStats.StartTimer(CStatForm::STI_EFFECTRENDER);
/////////////////////////////////// move water
SWORD *pNew = (SWORD*)_ptdEffect->td_pubBuffer1;
SWORD *pOld = (SWORD*)_ptdEffect->td_pubBuffer2;
PIX pixV, pixU;
PIX pixOffset, iNew;
SLONG slLineAbove, slLineBelow, slLineLeft, slLineRight;
// inner rectangle (without 1 pixel top and bottom line)
pixOffset = _pixBufferWidth + 1;
for( pixV=_pixBufferHeight-2; pixV>0; pixV--) {
for( pixU=_pixBufferWidth; pixU>0; pixU--) {
iNew = (( (SLONG)pOld[pixOffset - _pixBufferWidth]
+ (SLONG)pOld[pixOffset + _pixBufferWidth]
+ (SLONG)pOld[pixOffset - 1]
+ (SLONG)pOld[pixOffset + 1]
) >> 1)
- (SLONG)pNew[pixOffset];
pNew[pixOffset] = iNew - (iNew >> slDensity);
pixOffset++;
}
}
// upper horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
slLineBelow = _pixBufferWidth + 1;
slLineLeft = 0;
slLineRight = 2;
pixOffset = 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
iNew = (( (SLONG)pOld[slLineAbove]
+ (SLONG)pOld[slLineBelow]
+ (SLONG)pOld[slLineLeft]
+ (SLONG)pOld[slLineRight]
) >> 1)
- (SLONG)pNew[pixOffset];
pNew[pixOffset] = iNew - (iNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// lower horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1;
slLineBelow = 1;
slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth;
slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2;
pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
iNew = (( (SLONG)pOld[slLineAbove]
+ (SLONG)pOld[slLineBelow]
+ (SLONG)pOld[slLineLeft]
+ (SLONG)pOld[slLineRight]
) >> 1)
- (SLONG)pNew[pixOffset];
pNew[pixOffset] = iNew - (iNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// corner ( 0, 0)
iNew = (( (SLONG)pOld[_pixBufferWidth]
+ (SLONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth]
+ (SLONG)pOld[1]
+ (SLONG)pOld[_pixBufferWidth-1]
) >> 1)
- (SLONG)pNew[0];
pNew[0] = iNew - (iNew >> slDensity);
// corner ( 0, _pixBufferWidth)
iNew = (( (SLONG)pOld[(2*_pixBufferWidth) - 1]
+ (SLONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
+ (SLONG)pOld[0]
+ (SLONG)pOld[_pixBufferWidth-2]
) >> 1)
- (SLONG)pNew[_pixBufferWidth-1];
pNew[_pixBufferWidth-1] = iNew - (iNew >> slDensity);
// corner ( _pixBufferHeight, 0)
iNew = (( (SLONG)pOld[0]
+ (SLONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth]
+ (SLONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1]
+ (SLONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
) >> 1)
- (SLONG)pNew[(_pixBufferHeight-1)*_pixBufferWidth];
pNew[(_pixBufferHeight-1)*_pixBufferWidth] = iNew - (iNew >> slDensity);
// corner ( _pixBufferHeight, _pixBufferWidth)
iNew = (( (SLONG)pOld[_pixBufferWidth-1]
+ (SLONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1]
+ (SLONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth]
+ (SLONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2]
) >> 1)
- (SLONG)pNew[(_pixBufferHeight*_pixBufferWidth) - 1];
pNew[(_pixBufferHeight*_pixBufferWidth) - 1] = iNew - (iNew >> slDensity);
// swap buffers
Swap( _ptdEffect->td_pubBuffer1, _ptdEffect->td_pubBuffer2);
_sfStats.StopTimer(CStatForm::STI_EFFECTRENDER);
}
//////////////////////////// displace texture
#define PIXEL(u,v) pulTextureBase[ ((u)&(SLONG&)mmBaseWidthMask) + ((v)&(SLONG&)mmBaseHeightMask) *pixBaseWidth]
static ULONG _slHeightMapStep_renderWater = 0;
static PIX _pixBaseWidth_renderWater = 0;
#pragma warning(disable: 4731)
static void RenderWater(void)
{
_sfStats.StartTimer(CStatForm::STI_EFFECTRENDER);
// get textures' parameters
ULONG *pulTexture = _ptdEffect->td_pulFrames;
PIX pixBaseWidth = _ptdBase->GetPixWidth();
PIX pixBaseHeight = _ptdBase->GetPixHeight();
ULONG *pulTextureBase = _ptdBase->td_pulFrames
+ GetMipmapOffset( _iWantedMipLevel, pixBaseWidth, pixBaseHeight);
pixBaseWidth >>= _iWantedMipLevel;
pixBaseHeight >>= _iWantedMipLevel;
mmBaseWidthMask = pixBaseWidth -1;
mmBaseHeightMask = pixBaseHeight-1;
ASSERT( _ptdEffect->td_pulFrames!=NULL && _ptdBase->td_pulFrames!=NULL);
SWORD *pswHeightMap = (SWORD*)_ptdEffect->td_pubBuffer1; // height map pointer
// copy top 2 lines from height map to bottom (so no mask offset will be needed)
memcpy( (void*)(pswHeightMap+(_pixBufferHeight*_pixBufferWidth)), (void*)pswHeightMap,
_pixBufferWidth*sizeof(SWORD)*2);
// execute corresponding displace routine
if( _pixBufferWidth >= _pixTexWidth)
{ // SUB-SAMPLING
SLONG slHeightMapStep, slHeightRowStep;
#if ASMOPT == 1
#if (defined __MSVC_INLINE__)
__asm {
push ebx
bsf ecx,D [_pixTexWidth]
dec ecx
mov eax,D [_pixBufferWidth]
sar eax,cl
mov D [slHeightMapStep],eax
bsf edx,eax
add edx,DISTORTION+2-1
mov D [mmShift],edx
sub eax,2
imul eax,D [_pixBufferWidth]
mov D [slHeightRowStep],eax
mov eax,D [pixBaseWidth]
mov edx,D [pixBaseHeight]
shl edx,16
or eax,edx
sub eax,0x00010001
mov D [mmBaseMasks],eax
mov eax,D [pixBaseWidth]
shl eax,16
or eax,1
mov D [mmBaseWidth],eax
mov ebx,D [pswHeightMap]
mov esi,D [pulTextureBase]
mov edi,D [pulTexture]
pxor mm6,mm6 // MM5 = 0 | 0 || pixV | pixU
mov eax,D [_pixBufferWidth]
mov edx,D [_pixTexHeight]
rowLoop:
push edx
mov ecx,D [_pixTexWidth]
pixLoop:
movd mm1,D [ebx]
movd mm3,D [ebx+ eax*2]
movq mm2,mm1
psubw mm3,mm1
pslld mm1,16
psubw mm2,mm1
pand mm2,Q [mm00M0]
por mm2,mm3
psraw mm2,Q [mmShift]
paddw mm2,mm6
pand mm2,Q [mmBaseMasks]
pmaddwd mm2,Q [mmBaseWidth]
movd edx,mm2
mov edx,D [esi+ edx*4]
mov D [edi],edx
// advance to next texture pixel
add ebx,D [slHeightMapStep]
add edi,4
paddd mm6,Q [mm0001]
dec ecx
jnz pixLoop
// advance to next texture row
pop edx
add ebx,D [slHeightRowStep]
paddd mm6,Q [mm0010]
dec edx
jnz rowLoop
emms
pop ebx
}
#elif (defined __GNU_INLINE__)
// rcg12152001 needed extra registers. :(
_slHeightMapStep_renderWater = slHeightMapStep;
_pixBaseWidth_renderWater = pixBaseWidth;
__asm__ __volatile__ (
"pushl %%ebx \n\t" // GCC needs this.
"movl (" ASMSYM(_pixBaseWidth_renderWater) "),%%ebx \n\t"
"pushl %%eax \n\t" // pixBaseHeight
"pushl %%ebx \n\t" // pixBaseWidth
"pushl %%ecx \n\t" // pswHeightMap
"pushl %%edx \n\t" // pulTexture
"pushl %%esi \n\t" // pulTextureBase
"pushl %%edi \n\t" // slHeightRowStep
"bsfl (" ASMSYM(_pixTexWidth) "), %%ecx \n\t"
"decl %%ecx \n\t"
"movl (" ASMSYM(_pixBufferWidth) "), %%eax \n\t"
"sarl %%cl, %%eax \n\t"
"movl %%eax, (" ASMSYM(_slHeightMapStep_renderWater) ") \n\t"
"bsfl %%eax, %%edx \n\t"
"addl $4, %%edx \n\t"
"movl %%edx, (" ASMSYM(mmShift) ") \n\t"
"subl $2, %%eax \n\t"
"imul (" ASMSYM(_pixBufferWidth) "), %%eax \n\t"
"movl %%eax, (%%esp) \n\t" // slHeightRowStep
"movl 16(%%esp), %%eax \n\t" // pixBaseWidth
"movl 20(%%esp), %%edx \n\t" // pixBaseHeight
"shll $16, %%edx \n\t"
"orl %%edx, %%eax \n\t"
"subl $0x00010001, %%eax \n\t"
"movl %%eax, (" ASMSYM(mmBaseMasks) ") \n\t"
"movl 16(%%esp), %%eax \n\t" // pixBaseWidth
"shl $16, %%eax \n\t"
"orl $1, %%eax \n\t"
"movl %%eax, (" ASMSYM(mmBaseWidth) ") \n\t"
"movl 12(%%esp), %%ebx \n\t" // pswHeightMap
"movl 4(%%esp), %%esi \n\t" // pulTextureBase
"movl 8(%%esp), %%edi \n\t" // pulTexture
"pxor %%mm6, %%mm6 \n\t" // MM5 = 0 | 0 || pixV | pixU
"movl (" ASMSYM(_pixBufferWidth) "), %%eax \n\t"
"movl (" ASMSYM(_pixTexHeight) "), %%edx \n\t"
"0: \n\t" // rowLoop
"pushl %%edx \n\t"
"movl (" ASMSYM(_pixTexWidth) "), %%ecx \n\t"
"1: \n\t" // pixLoop
"movd (%%ebx), %%mm1 \n\t"
"movd (%%ebx, %%eax, 2), %%mm3 \n\t"
"movq %%mm1, %%mm2 \n\t"
"psubw %%mm1, %%mm3 \n\t"
"pslld $16, %%mm1 \n\t"
"psubw %%mm1, %%mm2 \n\t"
"pand (" ASMSYM(mm00M0) "), %%mm2 \n\t"
"por %%mm3, %%mm2 \n\t"
"psraw (" ASMSYM(mmShift) "), %%mm2 \n\t"
"paddw %%mm6, %%mm2 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm2 \n\t"
"pmaddwd (" ASMSYM(mmBaseWidth) "), %%mm2 \n\t"
"movd %%mm2, %%edx \n\t"
"movl (%%esi, %%edx, 4), %%edx \n\t"
"movl %%edx, (%%edi) \n\t"
// advance to next texture pixel
"addl (" ASMSYM(_slHeightMapStep_renderWater) "), %%ebx \n\t"
"addl $4, %%edi \n\t"
"paddd (" ASMSYM(mm0001) "), %%mm6 \n\t"
"decl %%ecx \n\t"
"jnz 1b \n\t" // pixLoop
// advance to next texture row
"popl %%edx \n\t"
"addl (%%esp), %%ebx \n\t" // slHeightRowStep
"paddd (" ASMSYM(mm0010) "), %%mm6 \n\t"
"decl %%edx \n\t"
"jnz 0b \n\t" // rowLoop
"addl $24, %%esp \n\t" // lose our locals...
"popl %%ebx \n\t" // restore GCC's register.
"emms \n\t"
: // no outputs.
: "a" (pixBaseHeight), "c" (pswHeightMap),
"d" (pulTexture), "S" (pulTextureBase), "D" (slHeightRowStep)
: "cc", "memory"
);
#else
#error fill in for your platform.
#endif
#else
PIX pixPos, pixDU, pixDV;
slHeightMapStep = _pixBufferWidth/pixBaseWidth;
slHeightRowStep = (slHeightMapStep-1)*_pixBufferWidth;
mmShift = DISTORTION+ FastLog2(slHeightMapStep) +2;
for( PIX pixV=0; pixV<_pixTexHeight; pixV++)
{ // row loop
for( PIX pixU=0; pixU<_pixTexWidth; pixU++)
{ // texel loop
pixPos = pswHeightMap[0];
pixDU = (pswHeightMap[1] - pixPos) >>(SLONG&)mmShift;
pixDV = (pswHeightMap[_pixBufferWidth] - pixPos) >>(SLONG&)mmShift;
pixDU = (pixU +pixDU) & (SLONG&)mmBaseWidthMask;
pixDV = (pixV +pixDV) & (SLONG&)mmBaseHeightMask;
*pulTexture++ = pulTextureBase[pixDV*pixBaseWidth + pixDU];
// advance to next texel in height map
pswHeightMap += slHeightMapStep;
}
pswHeightMap += slHeightRowStep;
}
#endif
}
else if( _pixBufferWidth*2 == _pixTexWidth)
{ // BILINEAR SUPER-SAMPLING 2
#if ASMOPT == 1
#if (defined __MSVC_INLINE__)
__asm {
push ebx
bsf eax,D [pixBaseWidth]
mov edx,32
sub edx,eax
mov D [mmBaseWidthShift],edx
movq mm0,Q [mmBaseHeightMask]
psllq mm0,32
por mm0,Q [mmBaseWidthMask]
movq Q [mmBaseMasks],mm0
pxor mm6,mm6 // MM6 = pixV|pixU
mov ebx,D [pswHeightMap]
mov esi,D [pulTextureBase]
mov edi,D [pulTexture]
mov edx,D [_pixBufferHeight]
rowLoop2:
push edx
mov edx,D [_pixTexWidth]
mov ecx,D [_pixBufferWidth]
pixLoop2:
mov eax,D [_pixBufferWidth]
movd mm1,D [ebx+ 2]
movd mm0,D [ebx+ eax*2]
psllq mm0,32
por mm1,mm0
movd mm0,D [ebx]
punpckldq mm0,mm0
psubd mm1,mm0
movq mm0,mm6
pslld mm0,DISTORTION+1+1
paddd mm1,mm0 // MM1 = slV_00 | slU_00
movd mm2,D [ebx+ 4]
movd mm0,D [ebx+ eax*2 +2]
psllq mm0,32
por mm2,mm0
movd mm0,D [ebx+ 2]
punpckldq mm0,mm0
psubd mm2,mm0
movq mm0,mm6
paddd mm0,Q [mm1LO]
pslld mm0,DISTORTION+1+1
paddd mm2,mm0 // MM2 = slV_01 | slU_01
movd mm3,D [ebx+ eax*2 +2]
movd mm0,D [ebx+ eax*4]
psllq mm0,32
por mm3,mm0
movd mm0,D [ebx+ eax*2]
punpckldq mm0,mm0
psubd mm3,mm0
movq mm0,mm6
paddd mm0,Q [mm1HI]
pslld mm0,DISTORTION+1+1
paddd mm3,mm0 // MM3 = slV_10 | slU_10
movd mm4,D [ebx+ eax*2 +4]
movd mm0,D [ebx+ eax*4 +2]
psllq mm0,32
por mm4,mm0
movd mm0,D [ebx+ eax*2 +2]
punpckldq mm0,mm0
psubd mm4,mm0
movq mm0,mm6
paddd mm0,Q [mm1HILO]
pslld mm0,DISTORTION+1+1
paddd mm4,mm0 // MM4 = slV_11 | slU_11
movq mm0,mm1
psrad mm0,DISTORTION+1+0
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi],eax
movq mm0,mm1
paddd mm0,mm2
psrad mm0,DISTORTION+1+1
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ 4],eax
movq mm0,mm1
paddd mm0,mm3
psrad mm0,DISTORTION+1+1
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4],eax
paddd mm1,mm2
paddd mm1,mm3
paddd mm1,mm4
psrad mm1,DISTORTION+1+2
pand mm1,Q [mmBaseMasks]
movq mm7,mm1
psrlq mm7,Q [mmBaseWidthShift]
paddd mm1,mm7
movd eax,mm1
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +4],eax
// advance to next texture pixels
paddd mm6,Q [mm1LO]
add edi,8
add ebx,2
dec ecx
jnz pixLoop2
// advance to next texture row
lea edi,[edi+ edx*4]
pop edx
paddd mm6,Q [mm1HI]
dec edx
jnz rowLoop2
emms
pop ebx
}
#elif (defined __GNU_INLINE__)
__asm__ __volatile__ (
"pushl %%ebx \n\t" // GCC's register.
"movl %%ecx, %%ebx \n\t"
"bsfl %%eax, %%eax \n\t" // pixBaseWidth
"movl $32, %%edx \n\t"
"subl %%eax, %%edx \n\t"
"movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t"
"movq (" ASMSYM(mmBaseHeightMask) "), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por (" ASMSYM(mmBaseWidthMask) "), %%mm0 \n\t"
"movq %%mm0, (" ASMSYM(mmBaseMasks) ") \n\t"
"pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU
// (These registers were loaded here in the original version...)
//"movl (pswHeightMap), %%ebx \n\t"
//"movl (pulTextureBase), %%esi \n\t"
//"movl (pulTexture), %%edi \n\t"
"movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t"
"0: \n\t" // rowLoop2
"pushl %%edx \n\t"
"movl (" ASMSYM(_pixTexWidth) "), %%edx \n\t"
"movl (" ASMSYM(_pixBufferWidth) "), %%ecx \n\t"
"1: \n\t" // pixLoop2
"mov (" ASMSYM(_pixBufferWidth) "), %%eax \n\t"
"movd 2(%%ebx), %%mm1 \n\t"
"movd 0(%%ebx, %%eax, 2), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm1 \n\t"
"movd (%%ebx), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm1 \n\t"
"movq %%mm6, %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm1 \n\t" // MM1 = slV_00 | slU_00
"movd 4(%%ebx), %%mm2 \n\t"
"movd 2(%%ebx, %%eax, 2), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm2 \n\t"
"movd 2(%%ebx), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm2 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1LO) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm2 \n\t" // MM2 = slV_01 | slU_01
"movd 2(%%ebx, %%eax, 2), %%mm3 \n\t"
"movd (%%ebx, %%eax, 4), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm3 \n\t"
"movd (%%ebx, %%eax, 2), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm3 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1HI) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm3 \n\t" // MM3 = slV_10 | slU_10
"movd 4(%%ebx, %%eax, 2), %%mm4 \n\t"
"movd 2(%%ebx, %%eax, 4), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm4 \n\t"
"movd 2(%%ebx, %%eax, 2), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm4 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1HILO) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm4 \n\t" // MM4 = slV_11 | slU_11
"movq %%mm1, %%mm0 \n\t"
"psrad $4, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi) \n\t"
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 4(%%edi) \n\t"
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi, %%edx, 4) \n\t"
"paddd %%mm2, %%mm1 \n\t"
"paddd %%mm3, %%mm1 \n\t"
"paddd %%mm4, %%mm1 \n\t"
"psrad $6, %%mm1 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm1 \n\t"
"movq %%mm1, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm1 \n\t"
"movd %%mm1, %%eax \n\t"
"mov (%%esi, %%eax, 4), %%eax \n\t"
"mov %%eax, 4(%%edi, %%edx, 4) \n\t"
// advance to next texture pixels
"paddd (" ASMSYM(mm1LO) "), %%mm6 \n\t"
"addl $8, %%edi \n\t"
"addl $2, %%ebx \n\t"
"decl %%ecx \n\t"
"jnz 1b \n\t" // pixLoop2
// advance to next texture row
"leal (%%edi, %%edx, 4), %%edi \n\t"
"popl %%edx \n\t"
"paddd (" ASMSYM(mm1HI) "), %%mm6 \n\t"
"decl %%edx \n\t"
"jnz 0b \n\t" // rowLoop2
"popl %%ebx \n\t" // GCC's value.
"emms \n\t"
: // no outputs.
: "a" (pixBaseWidth), "c" (pswHeightMap),
"S" (pulTextureBase), "D" (pulTexture)
: "edx", "cc", "memory"
);
#else
#error fill in for you platform.
#endif
#else
SLONG slU_00, slU_01, slU_10, slU_11;
SLONG slV_00, slV_01, slV_10, slV_11;
for( PIX pixV=0; pixV<_pixBufferHeight; pixV++)
{ // row loop
for( PIX pixU=0; pixU<_pixBufferWidth; pixU++)
{ // texel loop
slU_00 = pswHeightMap[_pixBufferWidth*0+1] - pswHeightMap[_pixBufferWidth*0+0] + ((pixU+0)<<(DISTORTION+1+1));
slV_00 = pswHeightMap[_pixBufferWidth*1+0] - pswHeightMap[_pixBufferWidth*0+0] + ((pixV+0)<<(DISTORTION+1+1));
slU_01 = pswHeightMap[_pixBufferWidth*0+2] - pswHeightMap[_pixBufferWidth*0+1] + ((pixU+1)<<(DISTORTION+1+1));
slV_01 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*0+1] + ((pixV+0)<<(DISTORTION+1+1));
slU_10 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*1+0] + ((pixU+0)<<(DISTORTION+1+1));
slV_10 = pswHeightMap[_pixBufferWidth*2+0] - pswHeightMap[_pixBufferWidth*1+0] + ((pixV+1)<<(DISTORTION+1+1));
slU_11 = pswHeightMap[_pixBufferWidth*1+2] - pswHeightMap[_pixBufferWidth*1+1] + ((pixU+1)<<(DISTORTION+1+1));
slV_11 = pswHeightMap[_pixBufferWidth*2+1] - pswHeightMap[_pixBufferWidth*1+1] + ((pixV+1)<<(DISTORTION+1+1));
pulTexture[_pixTexWidth*0+0] = PIXEL( (slU_00 ) >>(DISTORTION+1 ), (slV_00 ) >>(DISTORTION+1 ) );
pulTexture[_pixTexWidth*0+1] = PIXEL( (slU_00+slU_01 ) >>(DISTORTION+1+1), (slV_00+slV_01 ) >>(DISTORTION+1+1) );
pulTexture[_pixTexWidth*1+0] = PIXEL( (slU_00 +slU_10 ) >>(DISTORTION+1+1), (slV_00 +slV_10 ) >>(DISTORTION+1+1) );
pulTexture[_pixTexWidth*1+1] = PIXEL( (slU_00+slU_01+slU_10+slU_11) >>(DISTORTION+1+2), (slV_00+slV_01+slV_10+slV_11) >>(DISTORTION+1+2) );
// advance to next texel
pulTexture+=2;
pswHeightMap++;
}
pulTexture+=_pixTexWidth;
}
#endif
}
else if( _pixBufferWidth*4 == _pixTexWidth)
{ // BILINEAR SUPER-SAMPLING 4
#if ASMOPT == 1
#if (defined __MSVC_INLINE__)
__asm {
push ebx
bsf eax,D [pixBaseWidth]
mov edx,32
sub edx,eax
mov D [mmBaseWidthShift],edx
movq mm0,Q [mmBaseHeightMask]
psllq mm0,32
por mm0,Q [mmBaseWidthMask]
movq Q [mmBaseMasks],mm0
pxor mm6,mm6 // MM6 = pixV|pixU
mov ebx,D [pswHeightMap]
mov esi,D [pulTextureBase]
mov edi,D [pulTexture]
mov edx,D [_pixBufferHeight]
rowLoop4:
push edx
mov ecx,D [_pixBufferWidth]
pixLoop4:
mov eax,D [_pixBufferWidth]
mov edx,D [_pixTexWidth]
movd mm1,D [ebx+ 2]
movd mm0,D [ebx+ eax*2]
psllq mm0,32
por mm1,mm0
movd mm0,D [ebx]
punpckldq mm0,mm0
psubd mm1,mm0
movq mm0,mm6
pslld mm0,DISTORTION+1+1
paddd mm1,mm0 // MM1 = slV_00 | slU_00
movd mm2,D [ebx+ 4]
movd mm0,D [ebx+ eax*2 +2]
psllq mm0,32
por mm2,mm0
movd mm0,D [ebx+ 2]
punpckldq mm0,mm0
psubd mm2,mm0
movq mm0,mm6
paddd mm0,Q [mm1LO]
pslld mm0,DISTORTION+1+1
paddd mm2,mm0 // MM2 = slV_01 | slU_01
movd mm3,D [ebx+ eax*2 +2]
movd mm0,D [ebx+ eax*4]
psllq mm0,32
por mm3,mm0
movd mm0,D [ebx+ eax*2]
punpckldq mm0,mm0
psubd mm3,mm0
movq mm0,mm6
paddd mm0,Q [mm1HI]
pslld mm0,DISTORTION+1+1
paddd mm3,mm0 // MM3 = slV_10 | slU_10
movd mm4,D [ebx+ eax*2 +4]
movd mm0,D [ebx+ eax*4 +2]
psllq mm0,32
por mm4,mm0
movd mm0,D [ebx+ eax*2 +2]
punpckldq mm0,mm0
psubd mm4,mm0
movq mm0,mm6
paddd mm0,Q [mm1HILO]
pslld mm0,DISTORTION+1+1
paddd mm4,mm0 // MM4 = slV_11 | slU_11
// texel 00
movq mm0,mm1
psrad mm0,DISTORTION
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi],eax
// texel 01
movq mm0,mm1
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm2
psrad mm0,DISTORTION+2
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi +4],eax
// texel 02
movq mm0,mm1
paddd mm0,mm2
psrad mm0,DISTORTION+1
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi +8],eax
// texel 03
movq mm0,mm1
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm2
psrad mm0,DISTORTION+2
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi +12],eax
// texel 10
movq mm0,mm1
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm3
psrad mm0,DISTORTION+2
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4],eax
// texel 11
movq mm0,mm1
pslld mm0,3
paddd mm0,mm1
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm4
psrad mm0,DISTORTION+4
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +4],eax
// texel 12
movq mm0,mm1
paddd mm0,mm0
paddd mm0,mm1
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm4
psrad mm0,DISTORTION+3
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +8],eax
// texel 13
movq mm0,mm2
pslld mm0,3
paddd mm0,mm2
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm3
paddd mm0,mm4
paddd mm0,mm4
paddd mm0,mm4
psrad mm0,DISTORTION+4
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +12],eax
// texel 20
movq mm0,mm1
paddd mm0,mm3
psrad mm0,DISTORTION+1
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*8],eax
// texel 21
movq mm0,mm1
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm4
psrad mm0,DISTORTION+3
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*8 +4],eax
// texel 22
movq mm0,mm1
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm4
psrad mm0,DISTORTION+2
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*8 +8],eax
// texel 23
movq mm0,mm1
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm4
paddd mm0,mm4
paddd mm0,mm4
psrad mm0,DISTORTION+3
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*8 +12],eax
imul edx,3 // _pixTexWidth*=3
// texel 30
movq mm0,mm1
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm3
psrad mm0,DISTORTION+2
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4],eax
// texel 31
movq mm0,mm3
pslld mm0,3
paddd mm0,mm3
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm2
paddd mm0,mm4
paddd mm0,mm4
paddd mm0,mm4
psrad mm0,DISTORTION+4
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +4],eax
// texel 32
movq mm0,mm4
paddd mm0,mm0
paddd mm0,mm4
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm2
paddd mm0,mm1
psrad mm0,DISTORTION+3
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +8],eax
// texel 33
movq mm0,mm4
pslld mm0,3
paddd mm0,mm4
paddd mm0,mm1
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm3
psrad mm0,DISTORTION+4
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +12],eax
// advance to next texture pixels
paddd mm6,Q [mm1LO]
add edi,16
add ebx,2
dec ecx
jnz pixLoop4
// advance to next texture row
lea edi,[edi+ edx*4] // +=[_pixTexWidth]*3
pop edx
paddd mm6,Q [mm1HI]
dec edx
jnz rowLoop4
emms
pop ebx
}
#elif (defined __GNU_INLINE__)
__asm__ __volatile__ (
"pushl %%ebx \n\t" // GCC's register.
"movl %%ecx, %%ebx \n\t"
"bsfl %%eax, %%eax \n\t"
"movl $32, %%edx \n\t"
"subl %%eax, %%edx \n\t"
"movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t"
"movq (" ASMSYM(mmBaseHeightMask) "), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por (" ASMSYM(mmBaseWidthMask) "), %%mm0 \n\t"
"movq %%mm0, (" ASMSYM(mmBaseMasks) ") \n\t"
"pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU
// (These registers were loaded here in the original version...)
//"movl (pswHeightMap), %%ebx \n\t"
//"movl (pulTextureBase), %%esi \n\t"
//"movl (pulTexture), %%edi \n\t"
"movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t"
"0: \n\t" // rowLoop4
"pushl %%edx \n\t"
"movl (" ASMSYM(_pixBufferWidth) "), %%ecx \n\t"
"1: \n\t" // pixLoop4
"movl (" ASMSYM(_pixBufferWidth) "), %%eax \n\t"
"movl (" ASMSYM(_pixTexWidth) "), %%edx \n\t"
"movd 2(%%ebx), %%mm1 \n\t"
"movd (%%ebx, %%eax, 2), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm1 \n\t"
"movd (%%ebx), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm1 \n\t"
"movq %%mm6, %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm1 \n\t" // MM1 = slV_00 | slU_00
"movd 4(%%ebx), %%mm2 \n\t"
"movd 2(%%ebx, %%eax, 2), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm2 \n\t"
"movd 2(%%ebx), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm2 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1LO) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm2 \n\t" // MM2 = slV_01 | slU_01
"movd 2(%%ebx, %%eax, 2), %%mm3 \n\t"
"movd (%%ebx, %%eax, 4), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm3 \n\t"
"movd (%%ebx, %%eax, 2), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm3 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1HI) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm3 \n\t" // MM3 = slV_10 | slU_10
"movd 4(%%ebx, %%eax, 2), %%mm4 \n\t"
"movd 2(%%ebx, %%eax, 4), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm4 \n\t"
"movd 2(%%ebx, %%eax, 2), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm4 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1HILO) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm4 \n\t" // MM4 = slV_11 | slU_11
// texel 00
"movq %%mm1, %%mm0 \n\t"
"psrad $3, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi) \n\t"
// texel 01
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 4(%%edi) \n\t"
// texel 02
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"psrad $4, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 8(%%edi) \n\t"
// texel 03
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 12(%%edi) \n\t"
// texel 10
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi, %%edx, 4) \n\t"
// texel 11
"movq %%mm1, %%mm0 \n\t"
"pslld $3, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 4(%%edi, %%edx, 4) \n\t"
// texel 12
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm0, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $6, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 8(%%edi, %%edx, 4) \n\t"
// texel 13
"movq %%mm2, %%mm0 \n\t"
"pslld $3, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 12(%%edi, %%edx, 4) \n\t"
// texel 20
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"psrad $4, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi, %%edx, 8) \n\t"
// texel 21
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $6, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 4(%%edi, %%edx, 8) \n\t"
// texel 22
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 8(%%edi, %%edx, 8) \n\t"
// texel 23
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $6, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 12(%%edi, %%edx, 8) \n\t"
"imull $3, %%edx \n\t" // _pixTexWidth*=3
// texel 30
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi, %%edx, 4) \n\t"
// texel 31
"movq %%mm3, %%mm0 \n\t"
"pslld $3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 4(%%edi, %%edx, 4) \n\t"
// texel 32
"movq %%mm4, %%mm0 \n\t"
"paddd %%mm0, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"psrad $6, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 8(%%edi, %%edx, 4) \n\t"
// texel 33
"movq %%mm4, %%mm0 \n\t"
"pslld $3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 12(%%edi, %%edx, 4) \n\t"
// advance to next texture pixels
"paddd (" ASMSYM(mm1LO) "), %%mm6 \n\t"
"addl $16, %%edi \n\t"
"addl $2, %%ebx \n\t"
"decl %%ecx \n\t"
"jnz 1b \n\t" // pixLoop4
// advance to next texture row
"leal (%%edi, %%edx, 4), %%edi \n\t"// +=[_pixTexWidth]*3
"popl %%edx \n\t"
"paddd (" ASMSYM(mm1HI) "), %%mm6 \n\t"
"decl %%edx \n\t"
"jnz 0b \n\t" // rowLoop4
"popl %%ebx \n\t" // Restore GCC's value.
"emms \n\t"
: // no outputs.
: "a" (pixBaseWidth), "c" (pswHeightMap),
"S" (pulTextureBase), "D" (pulTexture)
: "edx", "cc", "memory"
);
#else
#error fill in for you platform.
#endif
#else
SLONG slU_00, slU_01, slU_10, slU_11;
SLONG slV_00, slV_01, slV_10, slV_11;
mmBaseWidthShift = FastLog2( pixBaseWidth); // faster multiplying with shift
for( PIX pixV=0; pixV<_pixBufferHeight; pixV++)
{ // row loop
for( PIX pixU=0; pixU<_pixBufferWidth; pixU++)
{ // texel loop
slU_00 = pswHeightMap[_pixBufferWidth*0+1] - pswHeightMap[_pixBufferWidth*0+0] + ((pixU+0)<<(DISTORTION+2));
slV_00 = pswHeightMap[_pixBufferWidth*1+0] - pswHeightMap[_pixBufferWidth*0+0] + ((pixV+0)<<(DISTORTION+2));
slU_01 = pswHeightMap[_pixBufferWidth*0+2] - pswHeightMap[_pixBufferWidth*0+1] + ((pixU+1)<<(DISTORTION+2));
slV_01 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*0+1] + ((pixV+0)<<(DISTORTION+2));
slU_10 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*1+0] + ((pixU+0)<<(DISTORTION+2));
slV_10 = pswHeightMap[_pixBufferWidth*2+0] - pswHeightMap[_pixBufferWidth*1+0] + ((pixV+1)<<(DISTORTION+2));
slU_11 = pswHeightMap[_pixBufferWidth*1+2] - pswHeightMap[_pixBufferWidth*1+1] + ((pixU+1)<<(DISTORTION+2));
slV_11 = pswHeightMap[_pixBufferWidth*2+1] - pswHeightMap[_pixBufferWidth*1+1] + ((pixV+1)<<(DISTORTION+2));
pulTexture[_pixTexWidth*0+0] = PIXEL( (slU_00 ) >>(DISTORTION ), (slV_00 ) >>(DISTORTION ) );
pulTexture[_pixTexWidth*0+1] = PIXEL( (slU_00* 3+slU_01* 1 ) >>(DISTORTION+2), (slV_00* 3+slV_01* 1 ) >>(DISTORTION+2) );
pulTexture[_pixTexWidth*0+2] = PIXEL( (slU_00 +slU_01 ) >>(DISTORTION+1), (slV_00 +slV_01 ) >>(DISTORTION+1) );
pulTexture[_pixTexWidth*0+3] = PIXEL( (slU_00* 1+slU_01* 3 ) >>(DISTORTION+2), (slV_00* 1+slV_01* 3 ) >>(DISTORTION+2) );
pulTexture[_pixTexWidth*1+0] = PIXEL( (slU_00* 3 +slU_10* 1 ) >>(DISTORTION+2), (slV_00* 3 +slV_10 ) >>(DISTORTION+2) );
pulTexture[_pixTexWidth*1+1] = PIXEL( (slU_00* 9+slU_01* 3+slU_10* 3+slU_11* 1) >>(DISTORTION+4), (slV_00* 9+slV_01* 3+slV_10* 3+slV_11* 1) >>(DISTORTION+4) );
pulTexture[_pixTexWidth*1+2] = PIXEL( (slU_00* 3+slU_01* 3+slU_10* 1+slU_11* 1) >>(DISTORTION+3), (slV_00* 3+slV_01* 3+slV_10* 1+slV_11* 1) >>(DISTORTION+3) );
pulTexture[_pixTexWidth*1+3] = PIXEL( (slU_00* 3+slU_01* 9+slU_10* 1+slU_11* 3) >>(DISTORTION+4), (slV_00* 3+slV_01* 9+slV_10* 1+slV_11* 3) >>(DISTORTION+4) );
pulTexture[_pixTexWidth*2+0] = PIXEL( (slU_00 +slU_10 ) >>(DISTORTION+1), (slV_00 +slV_10 ) >>(DISTORTION+1) );
pulTexture[_pixTexWidth*2+1] = PIXEL( (slU_00* 3+slU_01* 1+slU_10* 3+slU_11* 1) >>(DISTORTION+3), (slV_00* 3+slV_01* 1+slV_10* 3+slV_11* 1) >>(DISTORTION+3) );
pulTexture[_pixTexWidth*2+2] = PIXEL( (slU_00 +slU_01 +slU_10 +slU_11 ) >>(DISTORTION+2), (slV_00 +slV_01 +slV_10 +slV_11 ) >>(DISTORTION+2) );
pulTexture[_pixTexWidth*2+3] = PIXEL( (slU_00* 1+slU_01* 3+slU_10* 1+slU_11* 3) >>(DISTORTION+3), (slV_00* 1+slV_01* 3+slV_10* 1+slV_11* 3) >>(DISTORTION+3) );
pulTexture[_pixTexWidth*3+0] = PIXEL( (slU_00* 1 +slU_10* 3 ) >>(DISTORTION+2), (slV_00* 1 +slV_10* 3 ) >>(DISTORTION+2) );
pulTexture[_pixTexWidth*3+1] = PIXEL( (slU_00* 3+slU_01* 1+slU_10* 9+slU_11* 3) >>(DISTORTION+4), (slV_00* 3+slV_01* 1+slV_10* 9+slV_11* 3) >>(DISTORTION+4) );
pulTexture[_pixTexWidth*3+2] = PIXEL( (slU_00* 1+slU_01* 1+slU_10* 3+slU_11* 3) >>(DISTORTION+3), (slV_00* 1+slV_01* 1+slV_10* 3+slV_11* 3) >>(DISTORTION+3) );
pulTexture[_pixTexWidth*3+3] = PIXEL( (slU_00* 1+slU_01* 3+slU_10* 3+slU_11* 9) >>(DISTORTION+4), (slV_00* 1+slV_01* 3+slV_10* 3+slV_11* 9) >>(DISTORTION+4) );
// advance to next texel
pulTexture+=4;
pswHeightMap++;
}
pulTexture+=_pixTexWidth*3;
}
#endif
}
else
{ // DO NOTHING
ASSERTALWAYS( "Effect textures larger than 256 pixels aren't supported");
}
_sfStats.StopTimer(CStatForm::STI_EFFECTRENDER);
}
#pragma warning(default: 4731)
///////////////// Fire
void InitializeFire(void)
{
Randomize( (ULONG)(_pTimer->GetHighPrecisionTimer().GetMilliseconds()));
}
enum PlasmaType {
ptNormal = 0,
ptUp,
ptUpTile,
ptDown,
ptDownTile
};
/*******************************
Plasma Animation
********************************/
static void AnimatePlasma( SLONG slDensity, PlasmaType eType)
{
_sfStats.StartTimer(CStatForm::STI_EFFECTRENDER);
/////////////////////////////////// move plasma
UBYTE *pNew = (UBYTE*)_ptdEffect->td_pubBuffer1;
UBYTE *pOld = (UBYTE*)_ptdEffect->td_pubBuffer2;
PIX pixV, pixU;
PIX pixOffset;
SLONG slLineAbove, slLineBelow, slLineLeft, slLineRight;
ULONG ulNew;
// --------------------------
// Normal plasma
// --------------------------
if (eType == ptNormal) {
// inner rectangle (without 1 pixel border)
pixOffset = _pixBufferWidth;
for( pixV=1; pixV<_pixBufferHeight-1; pixV++) {
for( pixU=0; pixU<_pixBufferWidth; pixU++) {
ulNew = ((((ULONG)pOld[pixOffset - _pixBufferWidth] +
(ULONG)pOld[pixOffset + _pixBufferWidth] +
(ULONG)pOld[pixOffset - 1] +
(ULONG)pOld[pixOffset + 1]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[pixOffset] = ulNew - (ulNew >> slDensity);
pixOffset++;
}
}
// upper horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
slLineBelow = _pixBufferWidth + 1;
slLineLeft = 0;
slLineRight = 2;
pixOffset = 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[pixOffset] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// lower horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1;
slLineBelow = 1;
slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth;
slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2;
pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[pixOffset] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// corner ( 0, 0)
ulNew = ((((ULONG)pOld[_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[1] +
(ULONG)pOld[_pixBufferWidth-1]
)>>2) +
(ULONG)pOld[0]
)>>1;
pNew[0] = ulNew - (ulNew >> slDensity);
// corner ( 0, _pixBufferWidth)
ulNew = ((((ULONG)pOld[(2*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] +
(ULONG)pOld[0] +
(ULONG)pOld[_pixBufferWidth-2]
)>>2) +
(ULONG)pOld[_pixBufferWidth-1]
)>>1;
pNew[_pixBufferWidth-1] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, 0)
ulNew = ((((ULONG)pOld[0] +
(ULONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>2) +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth]
)>>1;
pNew[(_pixBufferHeight-1)*_pixBufferWidth] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, _pixBufferWidth)
ulNew = ((((ULONG)pOld[_pixBufferWidth-1] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2]
)>>2) +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>1;
pNew[(_pixBufferHeight*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity);
// --------------------------
// Plasma going up
// --------------------------
} else if (eType==ptUp || eType==ptUpTile) {
// inner rectangle (without 1 pixel border)
pixOffset = _pixBufferWidth;
for( pixV=1; pixV<_pixBufferHeight-1; pixV++) {
for( pixU=0; pixU<_pixBufferWidth; pixU++) {
ulNew = ((((ULONG)pOld[pixOffset - _pixBufferWidth] +
(ULONG)pOld[pixOffset + _pixBufferWidth] +
(ULONG)pOld[pixOffset - 1] +
(ULONG)pOld[pixOffset + 1]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[pixOffset-_pixBufferWidth] = ulNew - (ulNew >> slDensity);
pixOffset++;
}
}
// tile
if (eType==ptUpTile) {
// upper horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
slLineBelow = _pixBufferWidth + 1;
slLineLeft = 0;
slLineRight = 2;
pixOffset = 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[slLineAbove] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// lower horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1;
slLineBelow = 1;
slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth;
slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2;
pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[slLineAbove] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// corner ( 0, 0)
ulNew = ((((ULONG)pOld[_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[1] +
(ULONG)pOld[_pixBufferWidth-1]
)>>2) +
(ULONG)pOld[0]
)>>1;
pNew[(_pixBufferHeight-1)*_pixBufferWidth] = ulNew - (ulNew >> slDensity);
// corner ( 0, _pixBufferWidth)
ulNew = ((((ULONG)pOld[(2*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] +
(ULONG)pOld[0] +
(ULONG)pOld[_pixBufferWidth-2]
)>>2) +
(ULONG)pOld[_pixBufferWidth-1]
)>>1;
pNew[(_pixBufferHeight*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, 0)
ulNew = ((((ULONG)pOld[0] +
(ULONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>2) +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth]
)>>1;
pNew[(_pixBufferHeight-2)*_pixBufferWidth] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, _pixBufferWidth)
ulNew = ((((ULONG)pOld[_pixBufferWidth-1] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2]
)>>2) +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>1;
pNew[((_pixBufferHeight-1)*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity);
}
// --------------------------
// Plasma going down
// --------------------------
} else if (eType==ptDown || eType==ptDownTile) {
// inner rectangle (without 1 pixel border)
pixOffset = _pixBufferWidth;
for( pixV=1; pixV<_pixBufferHeight-1; pixV++) {
for( pixU=0; pixU<_pixBufferWidth; pixU++) {
ulNew = ((((ULONG)pOld[pixOffset - _pixBufferWidth] +
(ULONG)pOld[pixOffset + _pixBufferWidth] +
(ULONG)pOld[pixOffset - 1] +
(ULONG)pOld[pixOffset + 1]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[pixOffset+_pixBufferWidth] = ulNew - (ulNew >> slDensity);
pixOffset++;
}
}
// tile
if (eType==ptDownTile) {
// upper horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
slLineBelow = _pixBufferWidth + 1;
slLineLeft = 0;
slLineRight = 2;
pixOffset = 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[slLineBelow] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// lower horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1;
slLineBelow = 1;
slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth;
slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2;
pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[slLineBelow] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// corner ( 0, 0)
ulNew = ((((ULONG)pOld[_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[1] +
(ULONG)pOld[_pixBufferWidth-1]
)>>2) +
(ULONG)pOld[0]
)>>1;
pNew[_pixBufferWidth] = ulNew - (ulNew >> slDensity);
// corner ( 0, _pixBufferWidth)
ulNew = ((((ULONG)pOld[(2*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] +
(ULONG)pOld[0] +
(ULONG)pOld[_pixBufferWidth-2]
)>>2) +
(ULONG)pOld[_pixBufferWidth-1]
)>>1;
pNew[(2*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, 0)
ulNew = ((((ULONG)pOld[0] +
(ULONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>2) +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth]
)>>1;
pNew[0] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, _pixBufferWidth)
ulNew = ((((ULONG)pOld[_pixBufferWidth-1] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2]
)>>2) +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>1;
pNew[_pixBufferWidth-1] = ulNew - (ulNew >> slDensity);
}
}
// swap buffers
Swap( _ptdEffect->td_pubBuffer1, _ptdEffect->td_pubBuffer2);
_sfStats.StopTimer(CStatForm::STI_EFFECTRENDER);
}
/*******************************
Fire Animation
********************************/
static void AnimateFire( SLONG slDensity)
{
// _sfStats.StartTimer(CStatForm::STI_EFFECTRENDER);
/////////////////////////////////// move fire
// use only one buffer (otherwise it's not working)
UBYTE *pubNew = (UBYTE*)_ptdEffect->td_pubBuffer2;
SLONG slBufferMask = _pixBufferWidth*_pixBufferHeight -1;
SLONG slColumnModulo = _pixBufferWidth*(_pixBufferHeight-2) -1;
#if ASMOPT == 1
#if (defined __MSVC_INLINE__)
__asm {
push ebx
mov edi,D [ulRNDSeed] ;// EDI = randomizer
mov esi,D [pubNew]
xor ebx,ebx
colLoopFM:
mov ecx,D [_pixBufferHeight]
sub ecx,2
rowLoopFM:
mov edx,D [_pixBufferWidth]
add edx,esi
movzx eax,B [ebx+ edx]
add edx,D [_pixBufferWidth]
movzx edx,B [ebx+ edx]
add eax,edx
shr eax,1
cmp eax,D [slDensity]
jg doCalc
mov B [esi+ebx],0
jmp pixDone
doCalc:
mov edx,edi
sar edx,16
and edx,D [slDensity]
sub eax,edx
movsx edx,B [asbMod3Sub1Table +edx]
add edx,ebx
and edx,D [slBufferMask]
mov B [esi+edx],al
imul edi,262147
pixDone:
// advance to next row
add ebx,D [_pixBufferWidth]
dec ecx
jnz rowLoopFM
// advance to next column
sub ebx,D [slColumnModulo]
cmp ebx,D [_pixBufferWidth]
jl colLoopFM
// all done
mov D [ulRNDSeed],edi
pop ebx
}
#elif (defined __GNU_INLINE__)
__asm__ __volatile__ (
"pushl %%ebx \n\t" // GCC's register.
"xorl %%ebx, %%ebx \n\t"
"pushl %%edx \n\t" // slColumnModulo
"pushl %%ecx \n\t" // slBufferMask
"pushl %%eax \n\t" // slDensity
"0: \n\t" // colLoopFM
"movl (" ASMSYM(_pixBufferHeight) "), %%ecx \n\t"
"subl $2, %%ecx \n\t"
"1: \n\t" // rowLoopFM
"movl (" ASMSYM(_pixBufferWidth) "), %%edx \n\t"
"addl %%esi, %%edx \n\t"
"movzbl (%%ebx, %%edx), %%eax \n\t"
"addl (" ASMSYM(_pixBufferWidth) "), %%edx \n\t"
"movzbl (%%ebx, %%edx), %%edx \n\t"
"addl %%edx, %%eax \n\t"
"shrl $1, %%eax \n\t"
"cmpl (%%esp), %%eax \n\t"
"jg doCalc_animateFire \n\t"
"movb $0, (%%esi, %%ebx) \n\t"
"jmp pixDone_animateFire \n\t"
"doCalc_animateFire: \n\t"
"movl %%edi, %%edx \n\t"
"sarl $16, %%edx \n\t"
"andl (%%esp), %%edx \n\t"
"subl %%edx, %%eax \n\t"
"movsbl " ASMSYM(asbMod3Sub1Table) "(%%edx), %%edx \n\t"
"addl %%ebx, %%edx \n\t"
"andl 4(%%esp), %%edx \n\t" // slBufferMask
"movb %%al, (%%esi, %%edx) \n\t"
"imull $262147, %%edi \n\t"
"pixDone_animateFire: \n\t"
// advance to next row
"addl (" ASMSYM(_pixBufferWidth) "), %%ebx \n\t"
"decl %%ecx \n\t"
"jnz 1b \n\t" // rowLoopFM
// advance to next column
"subl 8(%%esp), %%ebx \n\t" // slColumnModulo
"cmpl (" ASMSYM(_pixBufferWidth) "), %%ebx \n\t"
"jl 0b \n\t" // colLoopFM
// all done
"movl %%edi, (" ASMSYM(ulRNDSeed) ") \n\t"
"addl $12, %%esp \n\t" // lose our locals.
"popl %%ebx \n\t" // Restore GCC's var.
: // no outputs.
: "a" (slDensity), "c" (slBufferMask),
"d" (slColumnModulo), "D" (ulRNDSeed), "S" (pubNew)
: "cc", "memory"
);
#else
#error fill in for you platform.
#endif
#else
// inner rectangle (without 1 pixel border)
for( PIX pixU=0; pixU<_pixBufferWidth; pixU++)
{
SLONG slOffset = pixU;
for( PIX pixV=1; pixV<_pixBufferHeight-1; pixV++)
{
ULONG ulNew = ((ULONG)pubNew[_pixBufferWidth+slOffset] + (ULONG)pubNew[_pixBufferWidth*2+slOffset]) >>1;
if( ulNew>slDensity) {
ULONG ulNewDensity = RNDW&slDensity;
ulNew -= ulNewDensity;
SLONG slDifusion = (SLONG)asbMod3Sub1Table[ulNewDensity]; // (SLONG)(ulNewDensity%3-1);
SLONG slPos = (slDifusion+slOffset) & slBufferMask;
pubNew[slPos] = ulNew;
} else {
pubNew[slOffset] = 0;
}
slOffset += _pixBufferWidth;
}
}
#endif
// _sfStats.StopTimer(CStatForm::STI_EFFECTRENDER);
}
//////////////////////////// displace texture
static UBYTE *_pubHeat_RenderPlasmaFire = NULL;
static void RenderPlasmaFire(void)
{
// _sfStats.StartTimer(CStatForm::STI_EFFECTRENDER);
// get and adjust textures' parameters
PIX pixBaseWidth = _ptdBase->GetPixWidth();
ULONG *pulTextureBase = _ptdBase->td_pulFrames;
ULONG *pulTexture = _ptdEffect->td_pulFrames;
ASSERT( _ptdEffect->td_pulFrames!=NULL && _ptdBase->td_pulFrames!=NULL && pixBaseWidth<=256);
UBYTE *pubHeat = (UBYTE*)_ptdEffect->td_pubBuffer2; // heat map pointer
SLONG slHeatMapStep = _pixBufferWidth/_pixTexWidth;
SLONG slHeatRowStep = (slHeatMapStep-1)*_pixBufferWidth;
SLONG slBaseMipShift = 8 - FastLog2(pixBaseWidth);
#if ASMOPT == 1
#if (defined __MSVC_INLINE__)
__asm {
push ebx
mov ebx,D [pubHeat]
mov esi,D [pulTextureBase]
mov edi,D [pulTexture]
mov ecx,D [_pixTexHeight]
rowLoopF:
push ecx
mov edx,D [_pixTexWidth]
mov ecx,D [slBaseMipShift]
pixLoopF:
movzx eax,B [ebx]
shr eax,cl
mov eax,D [esi+ eax*4]
mov D [edi],eax
// advance to next pixel
add ebx,D [slHeatMapStep]
add edi,4
dec edx
jnz pixLoopF
// advance to next row
pop ecx
add ebx,D [slHeatRowStep]
dec ecx
jnz rowLoopF
pop ebx
}
#elif (defined __GNU_INLINE__)
_pubHeat_RenderPlasmaFire = pubHeat; // ran out of registers. :/
__asm__ __volatile__ (
"pushl %%ebx \n\t"
"movl (" ASMSYM(_pubHeat_RenderPlasmaFire) "),%%ebx \n\t"
"pushl %%eax \n\t" // slHeatRowStep
"pushl %%edx \n\t" // slHeatMapStep
"pushl %%ecx \n\t" // slBaseMipShift
"movl (" ASMSYM(_pixTexHeight) "), %%ecx \n\t"
"0: \n\t" // rowLoopF
"pushl %%ecx \n\t"
"movl (" ASMSYM(_pixTexWidth) "), %%edx \n\t"
"movl 4(%%esp), %%ecx \n\t" // slBaseMipShift
"1: \n\t" // pixLoopF
"movzbl (%%ebx), %%eax \n\t"
"shrl %%cl, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi) \n\t"
// advance to next pixel
"addl 8(%%esp), %%ebx \n\t" // slHeatMapStep
"addl $4, %%edi \n\t"
"decl %%edx \n\t"
"jnz 1b \n\t" // pixLoopF
// advance to next row
"popl %%ecx \n\t"
"addl 8(%%esp), %%ebx \n\t" // slHeatRowStep
"decl %%ecx \n\t"
"jnz 0b \n\t" // rowLoopF
"addl $12, %%esp \n\t" // lose our locals.
"popl %%ebx \n\t" // restore GCC's register.
: // no outputs.
: "S" (pulTextureBase), "D" (pulTexture),
"c" (slBaseMipShift), "a" (slHeatRowStep), "d" (slHeatMapStep)
: "cc", "memory"
);
#else
#error fill in for you platform.
#endif
#else
INDEX iPalette;
for( INDEX pixV=0; pixV<_pixTexHeight; pixV++) {
// for every pixel in horizontal line
for( INDEX pixU=0; pixU<_pixTexWidth; pixU++) {
iPalette = (*pubHeat)>>slBaseMipShift;
*pulTexture++ = pulTextureBase[iPalette];
pubHeat += slHeatMapStep;
}
pubHeat += slHeatRowStep;
}
#endif
// _sfStats.StopTimer(CStatForm::STI_EFFECTRENDER);
}
/////////////////////////////////////////////////////////////////////
// EFFECT TABLES
/////////////////////////////////////////////////////////////////////
struct TextureEffectSourceType atestWater[] = {
{
"Raindrops",
InitializeRaindropsStandard,
AnimateRaindropsStandard
},
{
"RaindropsBig",
InitializeRaindropsBig,
AnimateRaindropsBig
},
{
"RaindropsSmall",
InitializeRaindropsSmall,
AnimateRaindropsSmall
},
{
"Random Surfer",
InitializeRandomSurfer,
AnimateRandomSurfer
},
{
"Oscilator",
InitializeOscilator,
AnimateOscilator
},
{
"Vertical Line",
InitializeVertLine,
AnimateVertLine
},
{
"Horizontal Line",
InitializeHortLine,
AnimateHortLine
},
};
struct TextureEffectSourceType atestFire[] = {
{
"Point",
InitializeFirePoint,
AnimateFirePoint
},
{
"Random Point",
InitializeRandomFirePoint,
AnimateRandomFirePoint
},
{
"Shake Point",
InitializeFireShakePoint,
AnimateFireShakePoint
},
{
"Fire Place",
InitializeFirePlace,
AnimateFirePlace
},
{
"Roler",
InitializeFireRoler,
AnimateFireRoler
},
{
"Fall",
InitializeFireFall,
AnimateFireFall
},
{
"Fountain",
InitializeFireFountain,
AnimateFireFountain
},
{
"Side Fountain",
InitializeFireSideFountain,
AnimateFireSideFountain
},
{
"Lightning",
InitializeFireLightning,
AnimateFireLightning
},
{
"Lightning Ball",
InitializeFireLightningBall,
AnimateFireLightningBall
},
{
"Smoke",
InitializeFireSmoke,
AnimateFireSmoke
},
};
inline void AWaterFast(void) { AnimateWater(2); };
inline void AWaterMedium(void) { AnimateWater(3); };
inline void AWaterSlow(void) { AnimateWater(5); };
inline void APlasma(void) { AnimatePlasma(4, ptNormal); };
inline void APlasmaUp(void) { AnimatePlasma(4, ptUp); };
inline void APlasmaUpTile(void) { AnimatePlasma(4, ptUpTile); };
inline void APlasmaDown(void) { AnimatePlasma(5, ptDown); };
inline void APlasmaDownTile(void) { AnimatePlasma(5, ptDownTile); };
inline void APlasmaUpSlow(void) { AnimatePlasma(6, ptUp); };
inline void AFire(void) { AnimateFire(15); };
struct TextureEffectGlobalType _ategtTextureEffectGlobalPresets[] = {
{
"Water Fast",
InitializeWater,
AWaterFast,
sizeof(atestWater)/sizeof(atestWater[0]),
atestWater
},
{
"Water Medium",
InitializeWater,
AWaterMedium,
sizeof(atestWater)/sizeof(atestWater[0]),
atestWater
},
{
"Water Slow",
InitializeWater,
AWaterSlow,
sizeof(atestWater)/sizeof(atestWater[0]),
atestWater
},
{
"",
InitializeWater,
AWaterSlow,
sizeof(atestWater)/sizeof(atestWater[0]),
atestWater
},
{
"Plasma Tile",
InitializeFire,
APlasma,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Plasma Up",
InitializeFire,
APlasmaUp,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Plasma Up Tile",
InitializeFire,
APlasmaUpTile,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Plasma Down",
InitializeFire,
APlasmaDown,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Plasma Down Tile",
InitializeFire,
APlasmaDownTile,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Plasma Up Slow",
InitializeFire,
APlasmaUpSlow,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Fire",
InitializeFire,
AFire,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
};
INDEX _ctTextureEffectGlobalPresets = sizeof(_ategtTextureEffectGlobalPresets)
/ sizeof(_ategtTextureEffectGlobalPresets[0]);
// get effect type (TRUE if water type effect, FALSE if plasma or fire effect)
BOOL CTextureEffectGlobal::IsWater(void)
{
return( _ategtTextureEffectGlobalPresets[teg_ulEffectType].tegt_Initialize == InitializeWater);
}
// default constructor
CTextureEffectGlobal::CTextureEffectGlobal(CTextureData *ptdTexture, ULONG ulGlobalEffect)
{
// remember global effect's texture data for cross linking
teg_ptdTexture = ptdTexture;
teg_ulEffectType = ulGlobalEffect;
// init for animating
_ategtTextureEffectGlobalPresets[teg_ulEffectType].tegt_Initialize();
// make sure the texture will be updated next time when used
teg_updTexture.Invalidate();
}
// add new effect source.
void CTextureEffectGlobal::AddEffectSource( ULONG ulEffectSourceType, PIX pixU0, PIX pixV0,
PIX pixU1, PIX pixV1)
{
CTextureEffectSource* ptesNew = teg_atesEffectSources.New(1);
ptesNew->Initialize(this, ulEffectSourceType, pixU0, pixV0, pixU1, pixV1);
}
// animate effect texture
void CTextureEffectGlobal::Animate(void)
{
// if not set yet (funny word construction:)
if( !bTableSet) {
// set table for fast modulo 3 minus 1
for( INDEX i=0; i<256; i++) asbMod3Sub1Table[i]=(SBYTE)((i%3)-1);
bTableSet = TRUE;
}
// setup some internal vars
_ptdEffect = teg_ptdTexture;
_pixBufferWidth = _ptdEffect->td_pixBufferWidth;
_pixBufferHeight = _ptdEffect->td_pixBufferHeight;
_ulBufferMask = _pixBufferHeight*_pixBufferWidth -1;
// remember buffer pointers
_pubDrawBuffer=(UBYTE*)_ptdEffect->td_pubBuffer2;
_pswDrawBuffer=(SWORD*)_ptdEffect->td_pubBuffer2;
// for each effect source
FOREACHINDYNAMICARRAY( teg_atesEffectSources, CTextureEffectSource, itEffectSource) {
// let it animate itself
itEffectSource->Animate();
}
// use animation function for this global effect type
_ategtTextureEffectGlobalPresets[teg_ulEffectType].tegt_Animate();
// remember that it was calculated
teg_updTexture.MarkUpdated();
}
#pragma warning(disable: 4731)
// render effect texture
void CTextureEffectGlobal::Render( INDEX iWantedMipLevel, PIX pixTexWidth, PIX pixTexHeight)
{
// setup some internal vars
_ptdEffect = teg_ptdTexture;
_ptdBase = teg_ptdTexture->td_ptdBaseTexture;
_pixBufferWidth = _ptdEffect->td_pixBufferWidth;
_pixBufferHeight = _ptdEffect->td_pixBufferHeight;
if( IsWater()) {
// use water rendering routine
_pixTexWidth = pixTexWidth;
_pixTexHeight = pixTexHeight;
_iWantedMipLevel = iWantedMipLevel;
RenderWater();
} else {
// use plasma & fire rendering routine
_pixTexWidth = _ptdEffect->GetWidth() >>iWantedMipLevel;
_pixTexHeight = _ptdEffect->GetHeight() >>iWantedMipLevel;
RenderPlasmaFire();
}
}
#pragma warning(default: 4731)
// returns number of second it took to render effect texture
DOUBLE CTextureEffectGlobal::GetRenderingTime(void)
{
return( _sfStats.sf_astTimers[CStatForm::STI_EFFECTRENDER].st_tvElapsed.GetSeconds());
}