Serious-Engine/Sources/Engine/Graphics/TextureEffects.cpp
Ryan C. Gordon ee754e7edf Made static variables that inline asm needs to touch non-static.
I dislike having to do this, but Clang sees them as unused and removes them
from the object file, causing linking to fail.

The real solution here is to remove all the assembly code because it's 2016
and this game doesn't have to run on 133MHz Pentium now.  :)
2016-03-31 13:04:57 -04:00

3438 lines
114 KiB
C++

/* Copyright (c) 2002-2012 Croteam Ltd. All rights reserved. */
#include "Engine/StdH.h"
#include <Engine/Graphics/Texture.h>
#include <Engine/Graphics/TextureEffects.h>
#include <Engine/Math/Functions.h>
#include <Engine/Base/Timer.h>
#include <Engine/Base/Statistics_Internal.h>
#include <Engine/Templates/DynamicArray.cpp>
#include <Engine/Templates/Stock_CTextureData.h>
#include <Engine/Templates/StaticArray.cpp>
// asm shortcuts
#define O offset
#define Q qword ptr
#define D dword ptr
#define W word ptr
#define B byte ptr
#if (defined USE_PORTABLE_C)
#define ASMOPT 0
#elif (defined __MSVC_INLINE__)
#define ASMOPT 1
#elif (defined __GNU_INLINE__)
#define ASMOPT 1
#else
#define ASMOPT 0
#endif
__int64 mmBaseWidthShift=0;
__int64 mmBaseWidth=0;
__int64 mmBaseWidthMask=0;
__int64 mmBaseHeightMask=0;
__int64 mmBaseMasks=0;
__int64 mmShift=0;
#if (defined __GNUC__)
/*
* If these are "const" vars, they get optimized to hardcoded values when gcc
* builds with optimization, which means the linker can't resolve the
* references to them in the inline ASM. That's obnoxious.
*/
__int64 mm1LO = 0x0000000000000001ll;
__int64 mm1HI = 0x0000000100000000ll;
__int64 mm1HILO = 0x0000000100000001ll;
__int64 mm0001 = 0x0000000000000001ll;
__int64 mm0010 = 0x0000000000010000ll;
__int64 mm00M0 = 0x00000000FFFF0000ll;
static void *force_syms_to_exist = NULL;
void asm_force_mm1LO() { force_syms_to_exist = &mm1LO; }
void asm_force_mm1HI() { force_syms_to_exist = &mm1HI; }
void asm_force_mm1HILO() { force_syms_to_exist = &mm1HILO; }
void asm_force_mm0001() { force_syms_to_exist = &mm0001; }
void asm_force_mm0010() { force_syms_to_exist = &mm0010; }
void asm_force_mm00M0() { force_syms_to_exist = &mm00M0; }
void asm_force_mmBaseWidthShift() { force_syms_to_exist = &mmBaseWidthShift; }
void asm_force_mmBaseWidth() { force_syms_to_exist = &mmBaseWidth; }
void asm_force_mmBaseWidthMask() { force_syms_to_exist = &mmBaseWidthMask; }
void asm_force_mmBaseHeightMask() { force_syms_to_exist = &mmBaseHeightMask; }
void asm_force_mmBaseMasks() { force_syms_to_exist = &mmBaseMasks; }
void asm_force_mmShift() { force_syms_to_exist = &mmShift; }
#else
const __int64 mm1LO = 0x0000000000000001;
const __int64 mm1HI = 0x0000000100000000;
const __int64 mm1HILO = 0x0000000100000001;
const __int64 mm0001 = 0x0000000000000001;
const __int64 mm0010 = 0x0000000000010000;
const __int64 mm00M0 = 0x00000000FFFF0000;
#endif
// speed table
SBYTE asbMod3Sub1Table[256];
static BOOL bTableSet = FALSE;
static CTextureData *_ptdEffect, *_ptdBase;
static ULONG _ulBufferMask;
static INDEX _iWantedMipLevel;
static UBYTE *_pubDrawBuffer;
static SWORD *_pswDrawBuffer;
PIX _pixTexWidth, _pixTexHeight;
PIX _pixBufferWidth, _pixBufferHeight;
// randomizer
ULONG ulRNDSeed;
inline void Randomize( ULONG ulSeed)
{
if( ulSeed==0) ulSeed = 0x87654321;
ulRNDSeed = ulSeed*262147;
};
inline ULONG Rnd(void)
{
ulRNDSeed = ulRNDSeed*262147;
return ulRNDSeed;
};
#define RNDW (Rnd()>>16)
// Initialize the texture effect source.
void CTextureEffectSource::Initialize( class CTextureEffectGlobal *ptegGlobalEffect,
ULONG ulEffectSourceType, PIX pixU0, PIX pixV0,
PIX pixU1, PIX pixV1)
{ // remember global effect for cross linking
tes_ptegGlobalEffect = ptegGlobalEffect;
tes_ulEffectSourceType = ulEffectSourceType;
// obtain effect source table for current effect class
struct TextureEffectSourceType *patestSourceEffectTypes =
_ategtTextureEffectGlobalPresets[ ptegGlobalEffect->teg_ulEffectType].tet_atestEffectSourceTypes;
// init for animating
patestSourceEffectTypes[ulEffectSourceType].test_Initialize(this, pixU0, pixV0, pixU1, pixV1);
}
// Animate the texture effect source.
void CTextureEffectSource::Animate(void)
{
// obtain effect source table for current effect class
struct TextureEffectSourceType *patestSourceEffectTypes =
_ategtTextureEffectGlobalPresets[ tes_ptegGlobalEffect->teg_ulEffectType]
.tet_atestEffectSourceTypes;
// animating it
patestSourceEffectTypes[tes_ulEffectSourceType].test_Animate(this);
}
// ----------------------------------------
// SLONG WATER
// ----------------------------------------
inline void PutPixelSLONG_WATER( PIX pixU, PIX pixV, INDEX iHeight)
{
_pswDrawBuffer[(pixV*_pixBufferWidth+pixU)&_ulBufferMask] += iHeight;
}
inline void PutPixel9SLONG_WATER( PIX pixU, PIX pixV, INDEX iHeightMid)
{
INDEX iHeightSide = (iHeightMid*28053) >>16; // iHeight /0.851120 *0.364326;
INDEX iHeightDiag = (iHeightMid*12008) >>16; // iHeight /0.851120 *0.155951;
PutPixelSLONG_WATER( pixU-1, pixV-1, iHeightDiag);
PutPixelSLONG_WATER( pixU, pixV-1, iHeightSide);
PutPixelSLONG_WATER( pixU+1, pixV-1, iHeightDiag);
PutPixelSLONG_WATER( pixU-1, pixV, iHeightSide);
PutPixelSLONG_WATER( pixU, pixV, iHeightMid);
PutPixelSLONG_WATER( pixU+1, pixV, iHeightSide);
PutPixelSLONG_WATER( pixU-1, pixV+1, iHeightDiag);
PutPixelSLONG_WATER( pixU, pixV+1, iHeightSide);
PutPixelSLONG_WATER( pixU+1, pixV+1, iHeightDiag);
}
// ----------------------------------------
// UBYTE FIRE
// ----------------------------------------
inline void PutPixelUBYTE_FIRE( PIX pixU, PIX pixV, INDEX iHeight)
{
PIX pixLoc = (pixV*_pixBufferWidth+pixU) & _ulBufferMask;
_pubDrawBuffer[pixLoc] = Clamp( _pubDrawBuffer[pixLoc] +iHeight, 0L, 255L);
}
inline void PutPixel9UBYTE_FIRE( PIX pixU, PIX pixV, INDEX iHeightMid)
{
INDEX iHeightSide = (iHeightMid*28053) >>16; // iHeight /0.851120 *0.364326;
INDEX iHeightDiag = (iHeightMid*12008) >>16; // iHeight /0.851120 *0.155951;
PutPixelUBYTE_FIRE( pixU-1, pixV-1, iHeightDiag);
PutPixelUBYTE_FIRE( pixU, pixV-1, iHeightSide);
PutPixelUBYTE_FIRE( pixU+1, pixV-1, iHeightDiag);
PutPixelUBYTE_FIRE( pixU-1, pixV, iHeightSide);
PutPixelUBYTE_FIRE( pixU, pixV, iHeightMid);
PutPixelUBYTE_FIRE( pixU+1, pixV, iHeightSide);
PutPixelUBYTE_FIRE( pixU-1, pixV+1, iHeightDiag);
PutPixelUBYTE_FIRE( pixU, pixV+1, iHeightSide);
PutPixelUBYTE_FIRE( pixU+1, pixV+1, iHeightDiag);
}
inline void PutPixel25UBYTE_FIRE( PIX pixU, PIX pixV, INDEX iHeightMid)
{
INDEX iHeightSide = (iHeightMid*28053) >>16; // iHeight /0.851120 *0.364326;
INDEX iHeightDiag = (iHeightMid*12008) >>16; // iHeight /0.851120 *0.155951;
PutPixelUBYTE_FIRE( pixU-2, pixV-2, iHeightDiag);
PutPixelUBYTE_FIRE( pixU-1, pixV-2, iHeightSide);
PutPixelUBYTE_FIRE( pixU, pixV-2, iHeightSide);
PutPixelUBYTE_FIRE( pixU+1, pixV-2, iHeightSide);
PutPixelUBYTE_FIRE( pixU+2, pixV-2, iHeightDiag);
PutPixelUBYTE_FIRE( pixU-2, pixV-1, iHeightSide);
PutPixelUBYTE_FIRE( pixU-1, pixV-1, iHeightSide);
PutPixelUBYTE_FIRE( pixU, pixV-1, iHeightMid);
PutPixelUBYTE_FIRE( pixU+1, pixV-1, iHeightSide);
PutPixelUBYTE_FIRE( pixU+2, pixV-1, iHeightSide);
PutPixelUBYTE_FIRE( pixU-2, pixV, iHeightSide);
PutPixelUBYTE_FIRE( pixU-1, pixV, iHeightMid);
PutPixelUBYTE_FIRE( pixU, pixV, iHeightMid);
PutPixelUBYTE_FIRE( pixU+1, pixV, iHeightMid);
PutPixelUBYTE_FIRE( pixU+2, pixV, iHeightSide);
PutPixelUBYTE_FIRE( pixU-2, pixV+1, iHeightSide);
PutPixelUBYTE_FIRE( pixU-1, pixV+1, iHeightSide);
PutPixelUBYTE_FIRE( pixU, pixV+1, iHeightMid);
PutPixelUBYTE_FIRE( pixU+1, pixV+1, iHeightSide);
PutPixelUBYTE_FIRE( pixU+2, pixV+1, iHeightSide);
PutPixelUBYTE_FIRE( pixU+2, pixV+2, iHeightDiag);
PutPixelUBYTE_FIRE( pixU-1, pixV+2, iHeightSide);
PutPixelUBYTE_FIRE( pixU, pixV+2, iHeightSide);
PutPixelUBYTE_FIRE( pixU+1, pixV+2, iHeightSide);
PutPixelUBYTE_FIRE( pixU-2, pixV+2, iHeightDiag);
}
/////////////////////////////////////////////////////////////////////
// WATER EFFECTS
/////////////////////////////////////////////////////////////////////
// WARNING: Changing this value will BREAK the inline asm on
// GNU-based platforms (Linux, etc.) YOU HAVE BEEN WARNED.
#define DISTORTION 3 //3
///////////////// random surfer
struct Surfer {
FLOAT fU;
FLOAT fV;
FLOAT fAngle;
};
void InitializeRandomSurfer(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
Surfer &sf =
(*((Surfer *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
sf.fU = pixU0;
sf.fV = pixV0;
sf.fAngle = RNDW&7;
}
void AnimateRandomSurfer(CTextureEffectSource *ptes)
{
Surfer &sf =
(*((Surfer *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PutPixel9SLONG_WATER((long) sf.fU, (long) sf.fV, 125);
sf.fU += 2*sin(sf.fAngle);
sf.fV += 2*cos(sf.fAngle);
PutPixel9SLONG_WATER((long) sf.fU, (long) sf.fV, 250);
if((RNDW&15)==0) {
sf.fAngle += 3.14f/7.0f;
}
if((RNDW&15)==0) {
sf.fAngle -= 3.14f/5.0f;
}
}
///////////////// raindrops
struct Raindrop {
UBYTE pixU;
UBYTE pixV;
SWORD iHeight;
SWORD iIndex;
};
void InitializeRaindrops(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1, int iHeight)
{
for (int iIndex=0; iIndex<5; iIndex++) {
Raindrop &rd =
((Raindrop&) ptes->tes_tespEffectSourceProperties.tesp_achDummy[iIndex*sizeof(Raindrop)]);
rd.pixU = RNDW&(_pixBufferWidth -1);
rd.pixV = RNDW&(_pixBufferHeight-1);
rd.iHeight = RNDW&iHeight;
rd.iIndex = iIndex*8;
}
}
void InitializeRaindropsStandard(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) {
InitializeRaindrops(ptes, pixU0, pixV0, pixU1, pixV1, 255);
}
void InitializeRaindropsBig(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) {
InitializeRaindrops(ptes, pixU0, pixV0, pixU1, pixV1, 1023);
}
void InitializeRaindropsSmall(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) {
InitializeRaindrops(ptes, pixU0, pixV0, pixU1, pixV1, 31);
}
void AnimateRaindrops(CTextureEffectSource *ptes, int iHeight)
{
for (int iIndex=0; iIndex<5; iIndex++) {
Raindrop &rd =
((Raindrop&) ptes->tes_tespEffectSourceProperties.tesp_achDummy[iIndex*sizeof(Raindrop)]);
if (rd.iIndex < 48) {
rd.iIndex++;
if (rd.iIndex < 8) {
PutPixel9SLONG_WATER(rd.pixU, rd.pixV, (long) sin(rd.iIndex/4.0f*(-3.14f))*rd.iHeight);
}
} else {
rd.pixU = RNDW&(_pixBufferWidth -1);
rd.pixV = RNDW&(_pixBufferHeight-1);
rd.iHeight = RNDW&iHeight;
rd.iIndex = 0;
}
}
}
void AnimateRaindropsStandard(CTextureEffectSource *ptes) {
AnimateRaindrops(ptes, 255);
}
void AnimateRaindropsBig(CTextureEffectSource *ptes) {
AnimateRaindrops(ptes, 1023);
}
void AnimateRaindropsSmall(CTextureEffectSource *ptes) {
AnimateRaindrops(ptes, 31);
}
///////////////// oscilator
struct Oscilator {
UBYTE pixU;
UBYTE pixV;
FLOAT fAngle;
};
void InitializeOscilator(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
Oscilator &os =
(*((Oscilator *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
os.pixU = pixU0;
os.pixV = pixV0;
os.fAngle = -3.14f;
}
void AnimateOscilator(CTextureEffectSource *ptes)
{
Oscilator &os =
(*((Oscilator *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PutPixel9SLONG_WATER(os.pixU, os.pixV, (long) sin(os.fAngle)*150);
os.fAngle += (3.14f/6);
}
///////////////// Vertical Line
struct VertLine{
UBYTE pixU;
UBYTE pixV;
UWORD uwSize;
FLOAT fAngle;
};
void InitializeVertLine(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
VertLine &vl =
(*((VertLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
vl.pixU = pixU0;
vl.pixV = pixV0;
vl.fAngle = -3.14f;
if (pixV0==pixV1) {
vl.uwSize = 16;
} else {
vl.uwSize = abs(pixV1-pixV0);
}
}
void AnimateVertLine(CTextureEffectSource *ptes)
{
VertLine &vl =
(*((VertLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PIX pixV = vl.pixV;
for (int iCnt=0; iCnt<vl.uwSize; iCnt++) {
PutPixelSLONG_WATER(vl.pixU, pixV, (long) (sin(vl.fAngle)*25));
pixV = (pixV+1)&(_pixBufferHeight-1);
}
vl.fAngle += (3.14f/6);
}
///////////////// Horizontal Line
struct HortLine{
UBYTE pixU;
UBYTE pixV;
UWORD uwSize;
FLOAT fAngle;
};
void InitializeHortLine(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
HortLine &hl =
(*((HortLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
hl.pixU = pixU0;
hl.pixV = pixV0;
hl.fAngle = -3.14f;
if (pixU0==pixU1) {
hl.uwSize = 16;
} else {
hl.uwSize = abs(pixU1-pixU0);
}
}
void AnimateHortLine(CTextureEffectSource *ptes)
{
HortLine &hl =
(*((HortLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PIX pixU = hl.pixU;
for (int iCnt=0; iCnt<hl.uwSize; iCnt++) {
PutPixelSLONG_WATER(pixU, hl.pixV, (long) (sin(hl.fAngle)*25));
pixU = (pixU+1)&(_pixBufferWidth-1);
}
hl.fAngle += (3.14f/6);
}
/////////////////////////////////////////////////////////////////////
// FIRE EFFECTS
/////////////////////////////////////////////////////////////////////
///////////////// Fire Point
struct FirePoint{
UBYTE pixU;
UBYTE pixV;
};
void InitializeFirePoint(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ft.pixU = pixU0;
ft.pixV = pixV0;
}
void AnimateFirePoint(CTextureEffectSource *ptes)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PutPixel9UBYTE_FIRE(ft.pixU, ft.pixV, 255);
}
void InitializeRandomFirePoint(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ft.pixU = pixU0;
ft.pixV = pixV0;
}
void AnimateRandomFirePoint(CTextureEffectSource *ptes)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PutPixel9UBYTE_FIRE(ft.pixU, ft.pixV, RNDW&255);
}
void InitializeFireShakePoint(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ft.pixU = pixU0;
ft.pixV = pixV0;
}
void AnimateFireShakePoint(CTextureEffectSource *ptes)
{
FirePoint &ft =
(*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
UBYTE pixU, pixV;
pixU = RNDW%3 - 1;
pixV = RNDW%3 - 1;
PutPixel9UBYTE_FIRE(ft.pixU+pixU, ft.pixV+pixV, 255);
}
///////////////// Fire Place
#define FIREPLACE_SIZE 60
struct FirePlace{
UBYTE pixU;
UBYTE pixV;
UBYTE ubWidth;
UBYTE aubFire[FIREPLACE_SIZE];
};
void InitializeFirePlace(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FirePlace &fp =
(*((FirePlace *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
fp.pixU = pixU0;
fp.pixV = pixV0;
fp.ubWidth = abs(pixU1-pixU0);
if (fp.ubWidth>FIREPLACE_SIZE) fp.ubWidth=FIREPLACE_SIZE;
if (fp.ubWidth<10) fp.ubWidth = 10;
// clear fire array
for (int iCnt=0; iCnt<fp.ubWidth; iCnt++) {
fp.aubFire[iCnt] = 0;
}
}
void AnimateFirePlace(CTextureEffectSource *ptes)
{
INDEX iIndex;
FirePlace &fp =
(*((FirePlace *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ULONG ulRND = RNDW&255;
// match
if (ulRND>200) {
ULONG ulMatchIndex = ulRND%(fp.ubWidth-5);
for (iIndex=0; iIndex<5; iIndex++) {
fp.aubFire[ulMatchIndex+iIndex] = 255;
}
// water
} else if (ulRND<50) {
for (iIndex=0; iIndex<10; iIndex++) {
fp.aubFire[RNDW%fp.ubWidth] = 0;
}
}
// fix fire place
for (iIndex=0; iIndex<fp.ubWidth; iIndex++) {
UBYTE ubFlame = fp.aubFire[iIndex];
// flame is fading ?
if (ubFlame < 50) {
// starting to burn
if (ubFlame > 10) {
ubFlame += RNDW%30; //30
// give more fire
} else {
ubFlame += RNDW%30+30; //30,30
}
}
fp.aubFire[iIndex] = ubFlame;
}
// water on edges
for (iIndex=0; iIndex<4; iIndex++) {
INDEX iWater = RNDW%4;
fp.aubFire[iWater] = 0;
fp.aubFire[fp.ubWidth-1-iWater] = 0;
}
// smooth fire place
for (iIndex=1; iIndex<(fp.ubWidth-1); iIndex++) {
fp.aubFire[iIndex] = (fp.aubFire[iIndex-1]+fp.aubFire[iIndex]+fp.aubFire[iIndex+1])/3;
}
// draw fire place in buffer
for (iIndex=0; iIndex<fp.ubWidth; iIndex++) {
PutPixel9UBYTE_FIRE(fp.pixU+iIndex, fp.pixV, fp.aubFire[iIndex]);
}
}
///////////////// Fire Roler
struct FireRoler{
UBYTE pixU;
UBYTE pixV;
//FLOAT fRadius;
FLOAT fRadiusU;
FLOAT fRadiusV;
FLOAT fAngle;
FLOAT fAngleAdd;
};
void InitializeFireRoler(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireRoler &fr =
(*((FireRoler *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
fr.pixU = pixU0;
fr.pixV = pixV0;
if (pixU0==pixU1 && pixV0==pixV1) {
//fr.fRadius = 3;
fr.fRadiusU = 3;
fr.fRadiusV = 3;
fr.fAngleAdd = (3.14f/6);
} else {
//fr.fRadius = sqrt((pixU1-pixU0)*(pixU1-pixU0) + (pixV1-pixV0)*(pixV1-pixV0));
fr.fRadiusU = pixU1-pixU0;
fr.fRadiusV = pixV1-pixV0;
//fr.fAngleAdd = (3.14f/((fr.fRadius)*2));
fr.fAngleAdd = (3.14f/(Abs(fr.fRadiusU)+Abs(fr.fRadiusV)));
}
fr.fAngle = 0;
}
void AnimateFireRoler(CTextureEffectSource *ptes)
{
FireRoler &fr =
(*((FireRoler *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
PutPixel9UBYTE_FIRE((long) (cos(fr.fAngle)*fr.fRadiusU + fr.pixU),
(long) (sin(fr.fAngle)*fr.fRadiusV + fr.pixV), 255);
fr.fAngle += fr.fAngleAdd;
PutPixel9UBYTE_FIRE((long) (cos(fr.fAngle)*fr.fRadiusU + fr.pixU),
(long) (sin(fr.fAngle)*fr.fRadiusV + fr.pixV), 200);
fr.fAngle += fr.fAngleAdd;
PutPixel9UBYTE_FIRE((long) (cos(fr.fAngle)*fr.fRadiusU + fr.pixU),
(long) (sin(fr.fAngle)*fr.fRadiusV + fr.pixV), 150);
fr.fAngle += fr.fAngleAdd;
}
///////////////// Fire Fall
#define FIREFALL_POINTS 100
struct FireFall{
UBYTE pixU;
UBYTE pixV;
ULONG ulWidth;
ULONG ulPointToReinitialize;
};
struct FireFallPixel{
UBYTE pixU;
UBYTE pixV;
UBYTE ubSpeed;
};
void InitializeFireFall(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireFall &ff =
(*((FireFall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ff.pixU = pixU0;
ff.pixV = pixV0;
if (pixU0==pixU1) {
ff.ulWidth = 15;
} else {
ff.ulWidth = abs(pixU1-pixU0);
}
// initialize fall points
ptes->tes_atepPixels.New(FIREFALL_POINTS);
ff.ulPointToReinitialize = 0;
for (INDEX iIndex=0; iIndex<FIREFALL_POINTS; iIndex++) {
FireFallPixel &ffp = ((FireFallPixel&) ptes->tes_atepPixels[iIndex]);
ffp.pixU = ff.pixU+(RNDW%ff.ulWidth);
ffp.pixV = ff.pixV+(RNDW%_pixBufferHeight);
ffp.ubSpeed = (RNDW&1)+2;
}
}
void AnimateFireFall(CTextureEffectSource *ptes)
{
FireFall &ff =
(*((FireFall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
// animate fall points
for (INDEX iIndex=0; iIndex<FIREFALL_POINTS; iIndex++) {
FireFallPixel &ffp = ((FireFallPixel&) ptes->tes_atepPixels[iIndex]);
// fall from fall
int iHeight = (RNDW&3)*64 + 40;
if (ffp.ubSpeed == 2) {
PutPixelUBYTE_FIRE(ffp.pixU+(RNDW%3)-1, ffp.pixV, iHeight);
PutPixelUBYTE_FIRE(ffp.pixU+(RNDW%3)-1, ffp.pixV+1, iHeight-40);
} else {
PutPixelUBYTE_FIRE(ffp.pixU, ffp.pixV, iHeight);
PutPixelUBYTE_FIRE(ffp.pixU, ffp.pixV+1, iHeight-40);
}
ffp.pixV+=ffp.ubSpeed;
// when falled down reinitialize
if (ffp.pixV >= _pixBufferHeight) {
if (ff.ulPointToReinitialize == iIndex) {
ff.ulPointToReinitialize++;
if (ff.ulPointToReinitialize >= FIREFALL_POINTS) ff.ulPointToReinitialize = 0;
ffp.pixU = ff.pixU+(RNDW%ff.ulWidth);
ffp.pixV -= _pixBufferHeight;
ffp.ubSpeed = (RNDW&1)+2;
} else {
ffp.pixV -= _pixBufferHeight;
}
}
}
}
///////////////// Fire Fountain
#define FIREFOUNTAIN_POINTS 100
struct FireFountain{
UBYTE pixU;
UBYTE pixV;
ULONG ulWidth;
ULONG ulBaseHeight;
ULONG ulRandomHeight;
};
struct FireFountainPixel{
SWORD pixU;
SWORD pixV;
UBYTE pixLastU;
UBYTE pixLastV;
SWORD sbSpeedU;
SWORD sbSpeedV;
};
void InitializeFireFountain(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireFountain &ff =
(*((FireFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
ff.pixU = pixU0;
ff.pixV = pixV0;
// fountain width
if (pixU0==pixU1) {
ff.ulWidth = 31;
} else {
ff.ulWidth = abs(pixU1-pixU0)*2;
}
// fountain height
if (pixV0==pixV1) {
ff.ulBaseHeight = 120;
ff.ulRandomHeight = 40;
} else {
ff.ulBaseHeight = abs(pixV1-pixV0)*3;
ff.ulRandomHeight = abs(pixV1-pixV0);
}
// initialize fountain points
ptes->tes_atepPixels.New(FIREFOUNTAIN_POINTS*2);
for (INDEX iIndex=0; iIndex<FIREFOUNTAIN_POINTS*2; iIndex+=2) {
FireFountainPixel &ffp = ((FireFountainPixel&) ptes->tes_atepPixels[iIndex]);
ffp.pixU = (ff.pixU)<<6;
ffp.pixV = (RNDW%(_pixBufferHeight-(_pixBufferHeight>>3))+(_pixBufferHeight>>3))<<6;
ffp.pixLastU = (ffp.pixU)>>6;
ffp.pixLastV = (ffp.pixV)>>6;
ffp.sbSpeedU = 0;
ffp.sbSpeedV = 0;
}
}
void AnimateFireFountain(CTextureEffectSource *ptes)
{
FireFountain &ff =
(*((FireFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
// animate fountain points
for (INDEX iIndex=0; iIndex<FIREFOUNTAIN_POINTS*2; iIndex+=2) {
FireFountainPixel &ffp = ((FireFountainPixel&) ptes->tes_atepPixels[iIndex]);
// fall from fountain
PutPixelUBYTE_FIRE((ffp.pixU)>>6, (ffp.pixV)>>6, 200);
PutPixelUBYTE_FIRE(ffp.pixLastU, ffp.pixLastV, 150);
// move pixel
ffp.pixLastU = (ffp.pixU)>>6;
ffp.pixLastV = (ffp.pixV)>>6;
ffp.pixU+=ffp.sbSpeedU;
ffp.pixV-=ffp.sbSpeedV;
ffp.sbSpeedV-=8;
// when falled down reinitialize
if ((ffp.pixV>>6) >= (_pixBufferHeight-5)) {
ffp.pixU = (ff.pixU)<<6;
ffp.pixV = (ff.pixV)<<6;
ffp.pixLastU = (ffp.pixU)>>6;
ffp.pixLastV = (ffp.pixV)>>6;
ffp.sbSpeedU = (RNDW%ff.ulWidth)-(ff.ulWidth/2-1);
ffp.sbSpeedV = (RNDW%ff.ulRandomHeight)+ff.ulBaseHeight;
}
}
}
///////////////// Fire Fountain
#define FIRESIDEFOUNTAIN_POINTS 100
struct FireSideFountain{
UBYTE pixU;
UBYTE pixV;
ULONG ulBaseWidth;
ULONG ulRandomWidth;
ULONG ulSide;
};
struct FireSideFountainPixel{
SWORD pixU;
SWORD pixV;
UBYTE pixLastU;
UBYTE pixLastV;
SWORD sbSpeedU;
SWORD sbSpeedV;
};
void InitializeFireSideFountain(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireSideFountain &fsf =
(*((FireSideFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
fsf.pixU = pixU0;
fsf.pixV = pixV0;
// fountain width
if (pixU0==pixU1) {
fsf.ulBaseWidth = 80;
fsf.ulRandomWidth = 40;
fsf.ulSide = (pixU0>(_pixBufferWidth/2));
} else {
fsf.ulBaseWidth = abs(pixU1-pixU0)*2;
fsf.ulRandomWidth = abs(pixU1-pixU0);
fsf.ulSide = (pixU1<pixU0);
}
// initialize fountain points
ptes->tes_atepPixels.New(FIRESIDEFOUNTAIN_POINTS*2);
for (INDEX iIndex=0; iIndex<FIRESIDEFOUNTAIN_POINTS*2; iIndex+=2) {
FireSideFountainPixel &fsfp = ((FireSideFountainPixel&) ptes->tes_atepPixels[iIndex]);
fsfp.pixU = (fsf.pixU)<<6;
fsfp.pixV = (RNDW%(_pixBufferHeight-(_pixBufferHeight>>3))+(_pixBufferHeight>>3))<<6;
fsfp.pixLastU = (fsfp.pixU)>>6;
fsfp.pixLastV = (fsfp.pixV)>>6;
fsfp.sbSpeedU = 0;
fsfp.sbSpeedV = 0;
}
}
void AnimateFireSideFountain(CTextureEffectSource *ptes)
{
FireSideFountain &fsf =
(*((FireSideFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
// animate fountain points
for (INDEX iIndex=0; iIndex<FIRESIDEFOUNTAIN_POINTS*2; iIndex+=2) {
FireSideFountainPixel &fsfp = ((FireSideFountainPixel&) ptes->tes_atepPixels[iIndex]);
// fall from fountain
PutPixelUBYTE_FIRE((fsfp.pixU)>>6, (fsfp.pixV)>>6, 200);
PutPixelUBYTE_FIRE(fsfp.pixLastU, fsfp.pixLastV, 150);
// move pixel
fsfp.pixLastU = (fsfp.pixU)>>6;
fsfp.pixLastV = (fsfp.pixV)>>6;
fsfp.pixU+=fsfp.sbSpeedU;
fsfp.pixV-=fsfp.sbSpeedV;
fsfp.sbSpeedV-=8;
// when falled down reinitialize
if ((fsfp.pixV>>6) >= (_pixBufferHeight-5)) {
fsfp.pixU = (fsf.pixU)<<6;
fsfp.pixV = (fsf.pixV)<<6;
fsfp.pixLastU = (fsfp.pixU)>>6;
fsfp.pixLastV = (fsfp.pixV)>>6;
fsfp.sbSpeedU = (RNDW%fsf.ulRandomWidth)+fsf.ulBaseWidth;
if (fsf.ulSide) {
fsfp.sbSpeedU = -fsfp.sbSpeedU;
}
fsfp.sbSpeedV = 0;
}
}
}
///////////////// Fire Lightning
struct FireLightning{
FLOAT fpixUFrom;
FLOAT fpixVFrom;
FLOAT fpixUTo;
FLOAT fpixVTo;
FLOAT fvU;
FLOAT fvV;
FLOAT fvNormalU;
FLOAT fvNormalV;
FLOAT fDistance;
SLONG slCnt;
};
void InitializeFireLightning(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireLightning &fl =
(*((FireLightning *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
fl.fpixUFrom = (FLOAT) pixU0;
fl.fpixVFrom = (FLOAT) pixV0;
if (pixU0==pixU1 && pixV0==pixV1) {
fl.fpixUTo = Abs((FLOAT)_pixBufferWidth -fl.fpixUFrom);
fl.fpixVTo = Abs((FLOAT)_pixBufferHeight-fl.fpixVFrom);
} else {
fl.fpixUTo = (FLOAT) pixU1;
fl.fpixVTo = (FLOAT) pixV1;
}
fl.fDistance = sqrt((fl.fpixUTo-fl.fpixUFrom)*(fl.fpixUTo-fl.fpixUFrom)+
(fl.fpixVTo-fl.fpixVFrom)*(fl.fpixVTo-fl.fpixVFrom));
// vector
fl.fvU = (fl.fpixUTo-fl.fpixUFrom)/fl.fDistance;
fl.fvV = (fl.fpixVTo-fl.fpixVFrom)/fl.fDistance;
// normal vector
fl.fvNormalU = -fl.fvV;
fl.fvNormalV = fl.fvU;
// frame counter
fl.slCnt = 2;
}
void AnimateFireLightning(CTextureEffectSource *ptes)
{
FLOAT fU, fV, fLastU, fLastV;
FLOAT fDU, fDV, fCnt;
SLONG slRND;
ULONG ulDist;
FireLightning &fl =
(*((FireLightning *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
// last point -> starting point
fLastU = fl.fpixUFrom;
fLastV = fl.fpixVFrom;
fl.slCnt--;
if (fl.slCnt == 0) {
ulDist = 0;
while ((FLOAT)ulDist<fl.fDistance) {
// go away from source point to destination point
ulDist += (RNDW%5)+5;
if ((FLOAT)ulDist>=fl.fDistance) {
// move point to line end
fU = fl.fpixUTo;
fV = fl.fpixVTo;
} else {
// move point on line
fU = fl.fpixUFrom + fl.fvU*(FLOAT)ulDist;
fV = fl.fpixVFrom + fl.fvV*(FLOAT)ulDist;
// move point offset on normal line
slRND = (SLONG) (RNDW%11)-5;
fU += fl.fvNormalU*(FLOAT)slRND;
fV += fl.fvNormalV*(FLOAT)slRND;
}
// draw line
fDU = fU-fLastU;
fDV = fV-fLastV;
if (Abs(fDU)>Abs(fDV)) fCnt = Abs(fDU);
else fCnt = Abs(fDV);
fDU = fDU/fCnt;
fDV = fDV/fCnt;
while (fCnt>0.0f) {
PutPixelUBYTE_FIRE((PIX) fLastU, (PIX) fLastV, 255);
fLastU += fDU;
fLastV += fDV;
fCnt -= 1;
}
// store last point
fLastU = fU;
fLastV = fV;
}
fl.slCnt = 2;
}
}
///////////////// Fire Lightning Ball
#define FIREBALL_LIGHTNINGS 2
struct FireLightningBall{
FLOAT fpixU;
FLOAT fpixV;
FLOAT fRadiusU;
FLOAT fRadiusV;
};
void InitializeFireLightningBall(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireLightningBall &flb =
(*((FireLightningBall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
flb.fpixU = (FLOAT) pixU0;
flb.fpixV = (FLOAT) pixV0;
if (pixU0==pixU1 && pixV0==pixV1) {
flb.fRadiusU = 20;
flb.fRadiusV = 20;
} else {
flb.fRadiusU = pixU1-pixU0;
flb.fRadiusV = pixV1-pixV0;
}
}
void AnimateFireLightningBall(CTextureEffectSource *ptes)
{
FLOAT fU, fV, fLastU, fLastV, fvU, fvV, fvNormalU, fvNormalV;
FLOAT fDU, fDV, fCnt, fDistance;
FLOAT fDestU, fDestV, fAngle;
SLONG slRND;
ULONG ulDist;
FireLightningBall &flb =
(*((FireLightningBall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
for (int iBalls=0; iBalls<FIREBALL_LIGHTNINGS; iBalls++) {
// last point -> starting point
fLastU = flb.fpixU;
fLastV = flb.fpixV;
// destination point
fAngle = (FLOAT) RNDW/10000;
fDestU = flb.fpixU + flb.fRadiusU*cos(fAngle);
fDestV = flb.fpixV + flb.fRadiusV*sin(fAngle);
fDistance = sqrt((fDestU-fLastU)*(fDestU-fLastU)+
(fDestV-fLastV)*(fDestV-fLastV));
// vector
fvU = (fDestU-fLastU)/fDistance;
fvV = (fDestV-fLastV)/fDistance;
// normal vector
fvNormalU = -fvV;
fvNormalV = fvU;
ulDist = 0;
while ((FLOAT)ulDist<fDistance) {
// go away from source point to destination point
ulDist += (RNDW%5)+5;
if ((FLOAT)ulDist>=fDistance) {
// move point on line
fU = fDestU;
fV = fDestV;
} else {
// move point on line
fU = flb.fpixU + fvU*(FLOAT)ulDist;
fV = flb.fpixV + fvV*(FLOAT)ulDist;
// move point offset on normal line
slRND = (SLONG) (RNDW%11)-5;
fU += fvNormalU*(FLOAT)slRND;
fV += fvNormalV*(FLOAT)slRND;
}
// draw line
fDU = fU-fLastU;
fDV = fV-fLastV;
// counter
if (Abs(fDU)>Abs(fDV)) fCnt = Abs(fDU);
else fCnt = Abs(fDV);
fDU = fDU/fCnt;
fDV = fDV/fCnt;
while (fCnt>0.0f) {
PutPixelUBYTE_FIRE((PIX) fLastU, (PIX) fLastV, 255);
fLastU += fDU;
fLastV += fDV;
fCnt -= 1;
}
// store last point
fLastU = fU;
fLastV = fV;
}
}
}
///////////////// Fire Smoke
#define SMOKE_POINTS 50
struct FireSmoke{
FLOAT fpixU;
FLOAT fpixV;
};
struct FireSmokePoint{
FLOAT fpixU;
FLOAT fpixV;
FLOAT fSpeedV;
};
void InitializeFireSmoke(CTextureEffectSource *ptes,
PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1)
{
FireSmoke &fs =
(*((FireSmoke *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
fs.fpixU = (FLOAT) pixU0;
fs.fpixV = (FLOAT) pixV0;
if (pixU0==pixU1 && pixV0==pixV1) {
} else {
}
// initialize smoke points
ptes->tes_atepPixels.New(SMOKE_POINTS*2);
for (INDEX iIndex=0; iIndex<SMOKE_POINTS*2; iIndex+=2) {
FireSmokePoint &fsp = ((FireSmokePoint&) ptes->tes_atepPixels[iIndex]);
fsp.fpixU = FLOAT (pixU0 + (iIndex-(SMOKE_POINTS))/8);
fsp.fpixV = FLOAT (pixV0);
fsp.fSpeedV = 0.0f;
}
}
void AnimateFireSmoke(CTextureEffectSource *ptes)
{
int iHeat;
FLOAT fRatio = 32.0f / (FLOAT)_pixBufferHeight;
UBYTE pixU, pixV;
FireSmoke &fs =
(*((FireSmoke *) ptes->tes_tespEffectSourceProperties.tesp_achDummy));
// animate smoke points
for (INDEX iIndex=0; iIndex<SMOKE_POINTS*2; iIndex+=2) {
FireSmokePoint &fsp = ((FireSmokePoint&) ptes->tes_atepPixels[iIndex]);
pixU = RNDW%3 - 1;
pixV = RNDW%3 - 1;
if (fsp.fSpeedV<0.1f) {
PutPixelUBYTE_FIRE((PIX) fsp.fpixU, (PIX) fsp.fpixV, RNDW%128);
} else {
iHeat = int(fsp.fpixV*fRatio+1);
PutPixel25UBYTE_FIRE((PIX) fsp.fpixU+pixU, (PIX) fsp.fpixV+pixV, RNDW%iHeat);
}
// start moving up
if (fsp.fSpeedV<0.1f && (RNDW&255)==0) {
fsp.fSpeedV = 1.0f;
}
// move up
fsp.fpixV -= fsp.fSpeedV;
// at the end of texture go on bottom
if (fsp.fpixV<=(FLOAT)_pixBufferHeight) {
fsp.fpixV = fs.fpixV;
fsp.fSpeedV = 0.0f;
}
}
}
///////////////// Water
void InitializeWater(void)
{
Randomize( (ULONG)(_pTimer->GetHighPrecisionTimer().GetMilliseconds()));
}
/*******************************
Water Animation
********************************/
static void AnimateWater( SLONG slDensity)
{
_sfStats.StartTimer(CStatForm::STI_EFFECTRENDER);
/////////////////////////////////// move water
SWORD *pNew = (SWORD*)_ptdEffect->td_pubBuffer1;
SWORD *pOld = (SWORD*)_ptdEffect->td_pubBuffer2;
PIX pixV, pixU;
PIX pixOffset, iNew;
SLONG slLineAbove, slLineBelow, slLineLeft, slLineRight;
// inner rectangle (without 1 pixel top and bottom line)
pixOffset = _pixBufferWidth + 1;
for( pixV=_pixBufferHeight-2; pixV>0; pixV--) {
for( pixU=_pixBufferWidth; pixU>0; pixU--) {
iNew = (( (SLONG)pOld[pixOffset - _pixBufferWidth]
+ (SLONG)pOld[pixOffset + _pixBufferWidth]
+ (SLONG)pOld[pixOffset - 1]
+ (SLONG)pOld[pixOffset + 1]
) >> 1)
- (SLONG)pNew[pixOffset];
pNew[pixOffset] = iNew - (iNew >> slDensity);
pixOffset++;
}
}
// upper horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
slLineBelow = _pixBufferWidth + 1;
slLineLeft = 0;
slLineRight = 2;
pixOffset = 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
iNew = (( (SLONG)pOld[slLineAbove]
+ (SLONG)pOld[slLineBelow]
+ (SLONG)pOld[slLineLeft]
+ (SLONG)pOld[slLineRight]
) >> 1)
- (SLONG)pNew[pixOffset];
pNew[pixOffset] = iNew - (iNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// lower horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1;
slLineBelow = 1;
slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth;
slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2;
pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
iNew = (( (SLONG)pOld[slLineAbove]
+ (SLONG)pOld[slLineBelow]
+ (SLONG)pOld[slLineLeft]
+ (SLONG)pOld[slLineRight]
) >> 1)
- (SLONG)pNew[pixOffset];
pNew[pixOffset] = iNew - (iNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// corner ( 0, 0)
iNew = (( (SLONG)pOld[_pixBufferWidth]
+ (SLONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth]
+ (SLONG)pOld[1]
+ (SLONG)pOld[_pixBufferWidth-1]
) >> 1)
- (SLONG)pNew[0];
pNew[0] = iNew - (iNew >> slDensity);
// corner ( 0, _pixBufferWidth)
iNew = (( (SLONG)pOld[(2*_pixBufferWidth) - 1]
+ (SLONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
+ (SLONG)pOld[0]
+ (SLONG)pOld[_pixBufferWidth-2]
) >> 1)
- (SLONG)pNew[_pixBufferWidth-1];
pNew[_pixBufferWidth-1] = iNew - (iNew >> slDensity);
// corner ( _pixBufferHeight, 0)
iNew = (( (SLONG)pOld[0]
+ (SLONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth]
+ (SLONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1]
+ (SLONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
) >> 1)
- (SLONG)pNew[(_pixBufferHeight-1)*_pixBufferWidth];
pNew[(_pixBufferHeight-1)*_pixBufferWidth] = iNew - (iNew >> slDensity);
// corner ( _pixBufferHeight, _pixBufferWidth)
iNew = (( (SLONG)pOld[_pixBufferWidth-1]
+ (SLONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1]
+ (SLONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth]
+ (SLONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2]
) >> 1)
- (SLONG)pNew[(_pixBufferHeight*_pixBufferWidth) - 1];
pNew[(_pixBufferHeight*_pixBufferWidth) - 1] = iNew - (iNew >> slDensity);
// swap buffers
Swap( _ptdEffect->td_pubBuffer1, _ptdEffect->td_pubBuffer2);
_sfStats.StopTimer(CStatForm::STI_EFFECTRENDER);
}
//////////////////////////// displace texture
#define PIXEL(u,v) pulTextureBase[ ((u)&(SLONG&)mmBaseWidthMask) + ((v)&(SLONG&)mmBaseHeightMask) *pixBaseWidth]
ULONG _slHeightMapStep_renderWater = 0;
PIX _pixBaseWidth_renderWater = 0;
#pragma warning(disable: 4731)
static void RenderWater(void)
{
_sfStats.StartTimer(CStatForm::STI_EFFECTRENDER);
// get textures' parameters
ULONG *pulTexture = _ptdEffect->td_pulFrames;
PIX pixBaseWidth = _ptdBase->GetPixWidth();
PIX pixBaseHeight = _ptdBase->GetPixHeight();
ULONG *pulTextureBase = _ptdBase->td_pulFrames
+ GetMipmapOffset( _iWantedMipLevel, pixBaseWidth, pixBaseHeight);
pixBaseWidth >>= _iWantedMipLevel;
pixBaseHeight >>= _iWantedMipLevel;
mmBaseWidthMask = pixBaseWidth -1;
mmBaseHeightMask = pixBaseHeight-1;
ASSERT( _ptdEffect->td_pulFrames!=NULL && _ptdBase->td_pulFrames!=NULL);
SWORD *pswHeightMap = (SWORD*)_ptdEffect->td_pubBuffer1; // height map pointer
// copy top 2 lines from height map to bottom (so no mask offset will be needed)
memcpy( (void*)(pswHeightMap+(_pixBufferHeight*_pixBufferWidth)), (void*)pswHeightMap,
_pixBufferWidth*sizeof(SWORD)*2);
// execute corresponding displace routine
if( _pixBufferWidth >= _pixTexWidth)
{ // SUB-SAMPLING
SLONG slHeightMapStep, slHeightRowStep;
#if ASMOPT == 1
#if (defined __MSVC_INLINE__)
__asm {
push ebx
bsf ecx,D [_pixTexWidth]
dec ecx
mov eax,D [_pixBufferWidth]
sar eax,cl
mov D [slHeightMapStep],eax
bsf edx,eax
add edx,DISTORTION+2-1
mov D [mmShift],edx
sub eax,2
imul eax,D [_pixBufferWidth]
mov D [slHeightRowStep],eax
mov eax,D [pixBaseWidth]
mov edx,D [pixBaseHeight]
shl edx,16
or eax,edx
sub eax,0x00010001
mov D [mmBaseMasks],eax
mov eax,D [pixBaseWidth]
shl eax,16
or eax,1
mov D [mmBaseWidth],eax
mov ebx,D [pswHeightMap]
mov esi,D [pulTextureBase]
mov edi,D [pulTexture]
pxor mm6,mm6 // MM5 = 0 | 0 || pixV | pixU
mov eax,D [_pixBufferWidth]
mov edx,D [_pixTexHeight]
rowLoop:
push edx
mov ecx,D [_pixTexWidth]
pixLoop:
movd mm1,D [ebx]
movd mm3,D [ebx+ eax*2]
movq mm2,mm1
psubw mm3,mm1
pslld mm1,16
psubw mm2,mm1
pand mm2,Q [mm00M0]
por mm2,mm3
psraw mm2,Q [mmShift]
paddw mm2,mm6
pand mm2,Q [mmBaseMasks]
pmaddwd mm2,Q [mmBaseWidth]
movd edx,mm2
mov edx,D [esi+ edx*4]
mov D [edi],edx
// advance to next texture pixel
add ebx,D [slHeightMapStep]
add edi,4
paddd mm6,Q [mm0001]
dec ecx
jnz pixLoop
// advance to next texture row
pop edx
add ebx,D [slHeightRowStep]
paddd mm6,Q [mm0010]
dec edx
jnz rowLoop
emms
pop ebx
}
#elif (defined __GNU_INLINE__)
// rcg12152001 needed extra registers. :(
_slHeightMapStep_renderWater = slHeightMapStep;
_pixBaseWidth_renderWater = pixBaseWidth;
__asm__ __volatile__ (
"pushl %%ebx \n\t" // GCC needs this.
"movl (" ASMSYM(_pixBaseWidth_renderWater) "),%%ebx \n\t"
"pushl %%eax \n\t" // pixBaseHeight
"pushl %%ebx \n\t" // pixBaseWidth
"pushl %%ecx \n\t" // pswHeightMap
"pushl %%edx \n\t" // pulTexture
"pushl %%esi \n\t" // pulTextureBase
"pushl %%edi \n\t" // slHeightRowStep
"bsfl (" ASMSYM(_pixTexWidth) "), %%ecx \n\t"
"decl %%ecx \n\t"
"movl (" ASMSYM(_pixBufferWidth) "), %%eax \n\t"
"sarl %%cl, %%eax \n\t"
"movl %%eax, (" ASMSYM(_slHeightMapStep_renderWater) ") \n\t"
"bsfl %%eax, %%edx \n\t"
"addl $4, %%edx \n\t"
"movl %%edx, (" ASMSYM(mmShift) ") \n\t"
"subl $2, %%eax \n\t"
"imul (" ASMSYM(_pixBufferWidth) "), %%eax \n\t"
"movl %%eax, (%%esp) \n\t" // slHeightRowStep
"movl 16(%%esp), %%eax \n\t" // pixBaseWidth
"movl 20(%%esp), %%edx \n\t" // pixBaseHeight
"shll $16, %%edx \n\t"
"orl %%edx, %%eax \n\t"
"subl $0x00010001, %%eax \n\t"
"movl %%eax, (" ASMSYM(mmBaseMasks) ") \n\t"
"movl 16(%%esp), %%eax \n\t" // pixBaseWidth
"shl $16, %%eax \n\t"
"orl $1, %%eax \n\t"
"movl %%eax, (" ASMSYM(mmBaseWidth) ") \n\t"
"movl 12(%%esp), %%ebx \n\t" // pswHeightMap
"movl 4(%%esp), %%esi \n\t" // pulTextureBase
"movl 8(%%esp), %%edi \n\t" // pulTexture
"pxor %%mm6, %%mm6 \n\t" // MM5 = 0 | 0 || pixV | pixU
"movl (" ASMSYM(_pixBufferWidth) "), %%eax \n\t"
"movl (" ASMSYM(_pixTexHeight) "), %%edx \n\t"
"0: \n\t" // rowLoop
"pushl %%edx \n\t"
"movl (" ASMSYM(_pixTexWidth) "), %%ecx \n\t"
"1: \n\t" // pixLoop
"movd (%%ebx), %%mm1 \n\t"
"movd (%%ebx, %%eax, 2), %%mm3 \n\t"
"movq %%mm1, %%mm2 \n\t"
"psubw %%mm1, %%mm3 \n\t"
"pslld $16, %%mm1 \n\t"
"psubw %%mm1, %%mm2 \n\t"
"pand (" ASMSYM(mm00M0) "), %%mm2 \n\t"
"por %%mm3, %%mm2 \n\t"
"psraw (" ASMSYM(mmShift) "), %%mm2 \n\t"
"paddw %%mm6, %%mm2 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm2 \n\t"
"pmaddwd (" ASMSYM(mmBaseWidth) "), %%mm2 \n\t"
"movd %%mm2, %%edx \n\t"
"movl (%%esi, %%edx, 4), %%edx \n\t"
"movl %%edx, (%%edi) \n\t"
// advance to next texture pixel
"addl (" ASMSYM(_slHeightMapStep_renderWater) "), %%ebx \n\t"
"addl $4, %%edi \n\t"
"paddd (" ASMSYM(mm0001) "), %%mm6 \n\t"
"decl %%ecx \n\t"
"jnz 1b \n\t" // pixLoop
// advance to next texture row
"popl %%edx \n\t"
"addl (%%esp), %%ebx \n\t" // slHeightRowStep
"paddd (" ASMSYM(mm0010) "), %%mm6 \n\t"
"decl %%edx \n\t"
"jnz 0b \n\t" // rowLoop
"addl $24, %%esp \n\t" // lose our locals...
"popl %%ebx \n\t" // restore GCC's register.
"emms \n\t"
: // no outputs.
: "a" (pixBaseHeight), "c" (pswHeightMap),
"d" (pulTexture), "S" (pulTextureBase), "D" (slHeightRowStep)
: "cc", "memory"
);
#else
#error fill in for your platform.
#endif
#else
PIX pixPos, pixDU, pixDV;
slHeightMapStep = _pixBufferWidth/pixBaseWidth;
slHeightRowStep = (slHeightMapStep-1)*_pixBufferWidth;
mmShift = DISTORTION+ FastLog2(slHeightMapStep) +2;
for( PIX pixV=0; pixV<_pixTexHeight; pixV++)
{ // row loop
for( PIX pixU=0; pixU<_pixTexWidth; pixU++)
{ // texel loop
pixPos = pswHeightMap[0];
pixDU = (pswHeightMap[1] - pixPos) >>(SLONG&)mmShift;
pixDV = (pswHeightMap[_pixBufferWidth] - pixPos) >>(SLONG&)mmShift;
pixDU = (pixU +pixDU) & (SLONG&)mmBaseWidthMask;
pixDV = (pixV +pixDV) & (SLONG&)mmBaseHeightMask;
*pulTexture++ = pulTextureBase[pixDV*pixBaseWidth + pixDU];
// advance to next texel in height map
pswHeightMap += slHeightMapStep;
}
pswHeightMap += slHeightRowStep;
}
#endif
}
else if( _pixBufferWidth*2 == _pixTexWidth)
{ // BILINEAR SUPER-SAMPLING 2
#if ASMOPT == 1
#if (defined __MSVC_INLINE__)
__asm {
push ebx
bsf eax,D [pixBaseWidth]
mov edx,32
sub edx,eax
mov D [mmBaseWidthShift],edx
movq mm0,Q [mmBaseHeightMask]
psllq mm0,32
por mm0,Q [mmBaseWidthMask]
movq Q [mmBaseMasks],mm0
pxor mm6,mm6 // MM6 = pixV|pixU
mov ebx,D [pswHeightMap]
mov esi,D [pulTextureBase]
mov edi,D [pulTexture]
mov edx,D [_pixBufferHeight]
rowLoop2:
push edx
mov edx,D [_pixTexWidth]
mov ecx,D [_pixBufferWidth]
pixLoop2:
mov eax,D [_pixBufferWidth]
movd mm1,D [ebx+ 2]
movd mm0,D [ebx+ eax*2]
psllq mm0,32
por mm1,mm0
movd mm0,D [ebx]
punpckldq mm0,mm0
psubd mm1,mm0
movq mm0,mm6
pslld mm0,DISTORTION+1+1
paddd mm1,mm0 // MM1 = slV_00 | slU_00
movd mm2,D [ebx+ 4]
movd mm0,D [ebx+ eax*2 +2]
psllq mm0,32
por mm2,mm0
movd mm0,D [ebx+ 2]
punpckldq mm0,mm0
psubd mm2,mm0
movq mm0,mm6
paddd mm0,Q [mm1LO]
pslld mm0,DISTORTION+1+1
paddd mm2,mm0 // MM2 = slV_01 | slU_01
movd mm3,D [ebx+ eax*2 +2]
movd mm0,D [ebx+ eax*4]
psllq mm0,32
por mm3,mm0
movd mm0,D [ebx+ eax*2]
punpckldq mm0,mm0
psubd mm3,mm0
movq mm0,mm6
paddd mm0,Q [mm1HI]
pslld mm0,DISTORTION+1+1
paddd mm3,mm0 // MM3 = slV_10 | slU_10
movd mm4,D [ebx+ eax*2 +4]
movd mm0,D [ebx+ eax*4 +2]
psllq mm0,32
por mm4,mm0
movd mm0,D [ebx+ eax*2 +2]
punpckldq mm0,mm0
psubd mm4,mm0
movq mm0,mm6
paddd mm0,Q [mm1HILO]
pslld mm0,DISTORTION+1+1
paddd mm4,mm0 // MM4 = slV_11 | slU_11
movq mm0,mm1
psrad mm0,DISTORTION+1+0
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi],eax
movq mm0,mm1
paddd mm0,mm2
psrad mm0,DISTORTION+1+1
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ 4],eax
movq mm0,mm1
paddd mm0,mm3
psrad mm0,DISTORTION+1+1
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4],eax
paddd mm1,mm2
paddd mm1,mm3
paddd mm1,mm4
psrad mm1,DISTORTION+1+2
pand mm1,Q [mmBaseMasks]
movq mm7,mm1
psrlq mm7,Q [mmBaseWidthShift]
paddd mm1,mm7
movd eax,mm1
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +4],eax
// advance to next texture pixels
paddd mm6,Q [mm1LO]
add edi,8
add ebx,2
dec ecx
jnz pixLoop2
// advance to next texture row
lea edi,[edi+ edx*4]
pop edx
paddd mm6,Q [mm1HI]
dec edx
jnz rowLoop2
emms
pop ebx
}
#elif (defined __GNU_INLINE__)
__asm__ __volatile__ (
"pushl %%ebx \n\t" // GCC's register.
"movl %%ecx, %%ebx \n\t"
"bsfl %%eax, %%eax \n\t" // pixBaseWidth
"movl $32, %%edx \n\t"
"subl %%eax, %%edx \n\t"
"movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t"
"movq (" ASMSYM(mmBaseHeightMask) "), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por (" ASMSYM(mmBaseWidthMask) "), %%mm0 \n\t"
"movq %%mm0, (" ASMSYM(mmBaseMasks) ") \n\t"
"pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU
// (These registers were loaded here in the original version...)
//"movl (pswHeightMap), %%ebx \n\t"
//"movl (pulTextureBase), %%esi \n\t"
//"movl (pulTexture), %%edi \n\t"
"movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t"
"0: \n\t" // rowLoop2
"pushl %%edx \n\t"
"movl (" ASMSYM(_pixTexWidth) "), %%edx \n\t"
"movl (" ASMSYM(_pixBufferWidth) "), %%ecx \n\t"
"1: \n\t" // pixLoop2
"mov (" ASMSYM(_pixBufferWidth) "), %%eax \n\t"
"movd 2(%%ebx), %%mm1 \n\t"
"movd 0(%%ebx, %%eax, 2), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm1 \n\t"
"movd (%%ebx), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm1 \n\t"
"movq %%mm6, %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm1 \n\t" // MM1 = slV_00 | slU_00
"movd 4(%%ebx), %%mm2 \n\t"
"movd 2(%%ebx, %%eax, 2), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm2 \n\t"
"movd 2(%%ebx), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm2 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1LO) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm2 \n\t" // MM2 = slV_01 | slU_01
"movd 2(%%ebx, %%eax, 2), %%mm3 \n\t"
"movd (%%ebx, %%eax, 4), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm3 \n\t"
"movd (%%ebx, %%eax, 2), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm3 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1HI) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm3 \n\t" // MM3 = slV_10 | slU_10
"movd 4(%%ebx, %%eax, 2), %%mm4 \n\t"
"movd 2(%%ebx, %%eax, 4), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm4 \n\t"
"movd 2(%%ebx, %%eax, 2), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm4 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1HILO) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm4 \n\t" // MM4 = slV_11 | slU_11
"movq %%mm1, %%mm0 \n\t"
"psrad $4, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi) \n\t"
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 4(%%edi) \n\t"
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi, %%edx, 4) \n\t"
"paddd %%mm2, %%mm1 \n\t"
"paddd %%mm3, %%mm1 \n\t"
"paddd %%mm4, %%mm1 \n\t"
"psrad $6, %%mm1 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm1 \n\t"
"movq %%mm1, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm1 \n\t"
"movd %%mm1, %%eax \n\t"
"mov (%%esi, %%eax, 4), %%eax \n\t"
"mov %%eax, 4(%%edi, %%edx, 4) \n\t"
// advance to next texture pixels
"paddd (" ASMSYM(mm1LO) "), %%mm6 \n\t"
"addl $8, %%edi \n\t"
"addl $2, %%ebx \n\t"
"decl %%ecx \n\t"
"jnz 1b \n\t" // pixLoop2
// advance to next texture row
"leal (%%edi, %%edx, 4), %%edi \n\t"
"popl %%edx \n\t"
"paddd (" ASMSYM(mm1HI) "), %%mm6 \n\t"
"decl %%edx \n\t"
"jnz 0b \n\t" // rowLoop2
"popl %%ebx \n\t" // GCC's value.
"emms \n\t"
: // no outputs.
: "a" (pixBaseWidth), "c" (pswHeightMap),
"S" (pulTextureBase), "D" (pulTexture)
: "edx", "cc", "memory"
);
#else
#error fill in for you platform.
#endif
#else
SLONG slU_00, slU_01, slU_10, slU_11;
SLONG slV_00, slV_01, slV_10, slV_11;
for( PIX pixV=0; pixV<_pixBufferHeight; pixV++)
{ // row loop
for( PIX pixU=0; pixU<_pixBufferWidth; pixU++)
{ // texel loop
slU_00 = pswHeightMap[_pixBufferWidth*0+1] - pswHeightMap[_pixBufferWidth*0+0] + ((pixU+0)<<(DISTORTION+1+1));
slV_00 = pswHeightMap[_pixBufferWidth*1+0] - pswHeightMap[_pixBufferWidth*0+0] + ((pixV+0)<<(DISTORTION+1+1));
slU_01 = pswHeightMap[_pixBufferWidth*0+2] - pswHeightMap[_pixBufferWidth*0+1] + ((pixU+1)<<(DISTORTION+1+1));
slV_01 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*0+1] + ((pixV+0)<<(DISTORTION+1+1));
slU_10 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*1+0] + ((pixU+0)<<(DISTORTION+1+1));
slV_10 = pswHeightMap[_pixBufferWidth*2+0] - pswHeightMap[_pixBufferWidth*1+0] + ((pixV+1)<<(DISTORTION+1+1));
slU_11 = pswHeightMap[_pixBufferWidth*1+2] - pswHeightMap[_pixBufferWidth*1+1] + ((pixU+1)<<(DISTORTION+1+1));
slV_11 = pswHeightMap[_pixBufferWidth*2+1] - pswHeightMap[_pixBufferWidth*1+1] + ((pixV+1)<<(DISTORTION+1+1));
pulTexture[_pixTexWidth*0+0] = PIXEL( (slU_00 ) >>(DISTORTION+1 ), (slV_00 ) >>(DISTORTION+1 ) );
pulTexture[_pixTexWidth*0+1] = PIXEL( (slU_00+slU_01 ) >>(DISTORTION+1+1), (slV_00+slV_01 ) >>(DISTORTION+1+1) );
pulTexture[_pixTexWidth*1+0] = PIXEL( (slU_00 +slU_10 ) >>(DISTORTION+1+1), (slV_00 +slV_10 ) >>(DISTORTION+1+1) );
pulTexture[_pixTexWidth*1+1] = PIXEL( (slU_00+slU_01+slU_10+slU_11) >>(DISTORTION+1+2), (slV_00+slV_01+slV_10+slV_11) >>(DISTORTION+1+2) );
// advance to next texel
pulTexture+=2;
pswHeightMap++;
}
pulTexture+=_pixTexWidth;
}
#endif
}
else if( _pixBufferWidth*4 == _pixTexWidth)
{ // BILINEAR SUPER-SAMPLING 4
#if ASMOPT == 1
#if (defined __MSVC_INLINE__)
__asm {
push ebx
bsf eax,D [pixBaseWidth]
mov edx,32
sub edx,eax
mov D [mmBaseWidthShift],edx
movq mm0,Q [mmBaseHeightMask]
psllq mm0,32
por mm0,Q [mmBaseWidthMask]
movq Q [mmBaseMasks],mm0
pxor mm6,mm6 // MM6 = pixV|pixU
mov ebx,D [pswHeightMap]
mov esi,D [pulTextureBase]
mov edi,D [pulTexture]
mov edx,D [_pixBufferHeight]
rowLoop4:
push edx
mov ecx,D [_pixBufferWidth]
pixLoop4:
mov eax,D [_pixBufferWidth]
mov edx,D [_pixTexWidth]
movd mm1,D [ebx+ 2]
movd mm0,D [ebx+ eax*2]
psllq mm0,32
por mm1,mm0
movd mm0,D [ebx]
punpckldq mm0,mm0
psubd mm1,mm0
movq mm0,mm6
pslld mm0,DISTORTION+1+1
paddd mm1,mm0 // MM1 = slV_00 | slU_00
movd mm2,D [ebx+ 4]
movd mm0,D [ebx+ eax*2 +2]
psllq mm0,32
por mm2,mm0
movd mm0,D [ebx+ 2]
punpckldq mm0,mm0
psubd mm2,mm0
movq mm0,mm6
paddd mm0,Q [mm1LO]
pslld mm0,DISTORTION+1+1
paddd mm2,mm0 // MM2 = slV_01 | slU_01
movd mm3,D [ebx+ eax*2 +2]
movd mm0,D [ebx+ eax*4]
psllq mm0,32
por mm3,mm0
movd mm0,D [ebx+ eax*2]
punpckldq mm0,mm0
psubd mm3,mm0
movq mm0,mm6
paddd mm0,Q [mm1HI]
pslld mm0,DISTORTION+1+1
paddd mm3,mm0 // MM3 = slV_10 | slU_10
movd mm4,D [ebx+ eax*2 +4]
movd mm0,D [ebx+ eax*4 +2]
psllq mm0,32
por mm4,mm0
movd mm0,D [ebx+ eax*2 +2]
punpckldq mm0,mm0
psubd mm4,mm0
movq mm0,mm6
paddd mm0,Q [mm1HILO]
pslld mm0,DISTORTION+1+1
paddd mm4,mm0 // MM4 = slV_11 | slU_11
// texel 00
movq mm0,mm1
psrad mm0,DISTORTION
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi],eax
// texel 01
movq mm0,mm1
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm2
psrad mm0,DISTORTION+2
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi +4],eax
// texel 02
movq mm0,mm1
paddd mm0,mm2
psrad mm0,DISTORTION+1
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi +8],eax
// texel 03
movq mm0,mm1
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm2
psrad mm0,DISTORTION+2
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi +12],eax
// texel 10
movq mm0,mm1
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm3
psrad mm0,DISTORTION+2
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4],eax
// texel 11
movq mm0,mm1
pslld mm0,3
paddd mm0,mm1
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm4
psrad mm0,DISTORTION+4
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +4],eax
// texel 12
movq mm0,mm1
paddd mm0,mm0
paddd mm0,mm1
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm4
psrad mm0,DISTORTION+3
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +8],eax
// texel 13
movq mm0,mm2
pslld mm0,3
paddd mm0,mm2
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm3
paddd mm0,mm4
paddd mm0,mm4
paddd mm0,mm4
psrad mm0,DISTORTION+4
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +12],eax
// texel 20
movq mm0,mm1
paddd mm0,mm3
psrad mm0,DISTORTION+1
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*8],eax
// texel 21
movq mm0,mm1
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm4
psrad mm0,DISTORTION+3
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*8 +4],eax
// texel 22
movq mm0,mm1
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm4
psrad mm0,DISTORTION+2
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*8 +8],eax
// texel 23
movq mm0,mm1
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm4
paddd mm0,mm4
paddd mm0,mm4
psrad mm0,DISTORTION+3
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*8 +12],eax
imul edx,3 // _pixTexWidth*=3
// texel 30
movq mm0,mm1
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm3
psrad mm0,DISTORTION+2
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4],eax
// texel 31
movq mm0,mm3
pslld mm0,3
paddd mm0,mm3
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm1
paddd mm0,mm2
paddd mm0,mm4
paddd mm0,mm4
paddd mm0,mm4
psrad mm0,DISTORTION+4
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +4],eax
// texel 32
movq mm0,mm4
paddd mm0,mm0
paddd mm0,mm4
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm2
paddd mm0,mm1
psrad mm0,DISTORTION+3
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +8],eax
// texel 33
movq mm0,mm4
pslld mm0,3
paddd mm0,mm4
paddd mm0,mm1
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm2
paddd mm0,mm3
paddd mm0,mm3
paddd mm0,mm3
psrad mm0,DISTORTION+4
pand mm0,Q [mmBaseMasks]
movq mm7,mm0
psrlq mm7,Q [mmBaseWidthShift]
paddd mm0,mm7
movd eax,mm0
mov eax,D [esi+ eax*4]
mov D [edi+ edx*4 +12],eax
// advance to next texture pixels
paddd mm6,Q [mm1LO]
add edi,16
add ebx,2
dec ecx
jnz pixLoop4
// advance to next texture row
lea edi,[edi+ edx*4] // +=[_pixTexWidth]*3
pop edx
paddd mm6,Q [mm1HI]
dec edx
jnz rowLoop4
emms
pop ebx
}
#elif (defined __GNU_INLINE__)
__asm__ __volatile__ (
"pushl %%ebx \n\t" // GCC's register.
"movl %%ecx, %%ebx \n\t"
"bsfl %%eax, %%eax \n\t"
"movl $32, %%edx \n\t"
"subl %%eax, %%edx \n\t"
"movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t"
"movq (" ASMSYM(mmBaseHeightMask) "), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por (" ASMSYM(mmBaseWidthMask) "), %%mm0 \n\t"
"movq %%mm0, (" ASMSYM(mmBaseMasks) ") \n\t"
"pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU
// (These registers were loaded here in the original version...)
//"movl (pswHeightMap), %%ebx \n\t"
//"movl (pulTextureBase), %%esi \n\t"
//"movl (pulTexture), %%edi \n\t"
"movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t"
"0: \n\t" // rowLoop4
"pushl %%edx \n\t"
"movl (" ASMSYM(_pixBufferWidth) "), %%ecx \n\t"
"1: \n\t" // pixLoop4
"movl (" ASMSYM(_pixBufferWidth) "), %%eax \n\t"
"movl (" ASMSYM(_pixTexWidth) "), %%edx \n\t"
"movd 2(%%ebx), %%mm1 \n\t"
"movd (%%ebx, %%eax, 2), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm1 \n\t"
"movd (%%ebx), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm1 \n\t"
"movq %%mm6, %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm1 \n\t" // MM1 = slV_00 | slU_00
"movd 4(%%ebx), %%mm2 \n\t"
"movd 2(%%ebx, %%eax, 2), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm2 \n\t"
"movd 2(%%ebx), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm2 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1LO) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm2 \n\t" // MM2 = slV_01 | slU_01
"movd 2(%%ebx, %%eax, 2), %%mm3 \n\t"
"movd (%%ebx, %%eax, 4), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm3 \n\t"
"movd (%%ebx, %%eax, 2), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm3 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1HI) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm3 \n\t" // MM3 = slV_10 | slU_10
"movd 4(%%ebx, %%eax, 2), %%mm4 \n\t"
"movd 2(%%ebx, %%eax, 4), %%mm0 \n\t"
"psllq $32, %%mm0 \n\t"
"por %%mm0, %%mm4 \n\t"
"movd 2(%%ebx, %%eax, 2), %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"psubd %%mm0, %%mm4 \n\t"
"movq %%mm6, %%mm0 \n\t"
"paddd (" ASMSYM(mm1HILO) "), %%mm0 \n\t"
"pslld $5, %%mm0 \n\t"
"paddd %%mm0, %%mm4 \n\t" // MM4 = slV_11 | slU_11
// texel 00
"movq %%mm1, %%mm0 \n\t"
"psrad $3, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi) \n\t"
// texel 01
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 4(%%edi) \n\t"
// texel 02
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"psrad $4, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 8(%%edi) \n\t"
// texel 03
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 12(%%edi) \n\t"
// texel 10
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi, %%edx, 4) \n\t"
// texel 11
"movq %%mm1, %%mm0 \n\t"
"pslld $3, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 4(%%edi, %%edx, 4) \n\t"
// texel 12
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm0, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $6, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 8(%%edi, %%edx, 4) \n\t"
// texel 13
"movq %%mm2, %%mm0 \n\t"
"pslld $3, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 12(%%edi, %%edx, 4) \n\t"
// texel 20
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"psrad $4, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi, %%edx, 8) \n\t"
// texel 21
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $6, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 4(%%edi, %%edx, 8) \n\t"
// texel 22
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 8(%%edi, %%edx, 8) \n\t"
// texel 23
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $6, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 12(%%edi, %%edx, 8) \n\t"
"imull $3, %%edx \n\t" // _pixTexWidth*=3
// texel 30
"movq %%mm1, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"psrad $5, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi, %%edx, 4) \n\t"
// texel 31
"movq %%mm3, %%mm0 \n\t"
"pslld $3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 4(%%edi, %%edx, 4) \n\t"
// texel 32
"movq %%mm4, %%mm0 \n\t"
"paddd %%mm0, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"psrad $6, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 8(%%edi, %%edx, 4) \n\t"
// texel 33
"movq %%mm4, %%mm0 \n\t"
"pslld $3, %%mm0 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm2, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"paddd %%mm3, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t"
"movq %%mm0, %%mm7 \n\t"
"psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"movd %%mm0, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, 12(%%edi, %%edx, 4) \n\t"
// advance to next texture pixels
"paddd (" ASMSYM(mm1LO) "), %%mm6 \n\t"
"addl $16, %%edi \n\t"
"addl $2, %%ebx \n\t"
"decl %%ecx \n\t"
"jnz 1b \n\t" // pixLoop4
// advance to next texture row
"leal (%%edi, %%edx, 4), %%edi \n\t"// +=[_pixTexWidth]*3
"popl %%edx \n\t"
"paddd (" ASMSYM(mm1HI) "), %%mm6 \n\t"
"decl %%edx \n\t"
"jnz 0b \n\t" // rowLoop4
"popl %%ebx \n\t" // Restore GCC's value.
"emms \n\t"
: // no outputs.
: "a" (pixBaseWidth), "c" (pswHeightMap),
"S" (pulTextureBase), "D" (pulTexture)
: "edx", "cc", "memory"
);
#else
#error fill in for you platform.
#endif
#else
SLONG slU_00, slU_01, slU_10, slU_11;
SLONG slV_00, slV_01, slV_10, slV_11;
mmBaseWidthShift = FastLog2( pixBaseWidth); // faster multiplying with shift
for( PIX pixV=0; pixV<_pixBufferHeight; pixV++)
{ // row loop
for( PIX pixU=0; pixU<_pixBufferWidth; pixU++)
{ // texel loop
slU_00 = pswHeightMap[_pixBufferWidth*0+1] - pswHeightMap[_pixBufferWidth*0+0] + ((pixU+0)<<(DISTORTION+2));
slV_00 = pswHeightMap[_pixBufferWidth*1+0] - pswHeightMap[_pixBufferWidth*0+0] + ((pixV+0)<<(DISTORTION+2));
slU_01 = pswHeightMap[_pixBufferWidth*0+2] - pswHeightMap[_pixBufferWidth*0+1] + ((pixU+1)<<(DISTORTION+2));
slV_01 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*0+1] + ((pixV+0)<<(DISTORTION+2));
slU_10 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*1+0] + ((pixU+0)<<(DISTORTION+2));
slV_10 = pswHeightMap[_pixBufferWidth*2+0] - pswHeightMap[_pixBufferWidth*1+0] + ((pixV+1)<<(DISTORTION+2));
slU_11 = pswHeightMap[_pixBufferWidth*1+2] - pswHeightMap[_pixBufferWidth*1+1] + ((pixU+1)<<(DISTORTION+2));
slV_11 = pswHeightMap[_pixBufferWidth*2+1] - pswHeightMap[_pixBufferWidth*1+1] + ((pixV+1)<<(DISTORTION+2));
pulTexture[_pixTexWidth*0+0] = PIXEL( (slU_00 ) >>(DISTORTION ), (slV_00 ) >>(DISTORTION ) );
pulTexture[_pixTexWidth*0+1] = PIXEL( (slU_00* 3+slU_01* 1 ) >>(DISTORTION+2), (slV_00* 3+slV_01* 1 ) >>(DISTORTION+2) );
pulTexture[_pixTexWidth*0+2] = PIXEL( (slU_00 +slU_01 ) >>(DISTORTION+1), (slV_00 +slV_01 ) >>(DISTORTION+1) );
pulTexture[_pixTexWidth*0+3] = PIXEL( (slU_00* 1+slU_01* 3 ) >>(DISTORTION+2), (slV_00* 1+slV_01* 3 ) >>(DISTORTION+2) );
pulTexture[_pixTexWidth*1+0] = PIXEL( (slU_00* 3 +slU_10* 1 ) >>(DISTORTION+2), (slV_00* 3 +slV_10 ) >>(DISTORTION+2) );
pulTexture[_pixTexWidth*1+1] = PIXEL( (slU_00* 9+slU_01* 3+slU_10* 3+slU_11* 1) >>(DISTORTION+4), (slV_00* 9+slV_01* 3+slV_10* 3+slV_11* 1) >>(DISTORTION+4) );
pulTexture[_pixTexWidth*1+2] = PIXEL( (slU_00* 3+slU_01* 3+slU_10* 1+slU_11* 1) >>(DISTORTION+3), (slV_00* 3+slV_01* 3+slV_10* 1+slV_11* 1) >>(DISTORTION+3) );
pulTexture[_pixTexWidth*1+3] = PIXEL( (slU_00* 3+slU_01* 9+slU_10* 1+slU_11* 3) >>(DISTORTION+4), (slV_00* 3+slV_01* 9+slV_10* 1+slV_11* 3) >>(DISTORTION+4) );
pulTexture[_pixTexWidth*2+0] = PIXEL( (slU_00 +slU_10 ) >>(DISTORTION+1), (slV_00 +slV_10 ) >>(DISTORTION+1) );
pulTexture[_pixTexWidth*2+1] = PIXEL( (slU_00* 3+slU_01* 1+slU_10* 3+slU_11* 1) >>(DISTORTION+3), (slV_00* 3+slV_01* 1+slV_10* 3+slV_11* 1) >>(DISTORTION+3) );
pulTexture[_pixTexWidth*2+2] = PIXEL( (slU_00 +slU_01 +slU_10 +slU_11 ) >>(DISTORTION+2), (slV_00 +slV_01 +slV_10 +slV_11 ) >>(DISTORTION+2) );
pulTexture[_pixTexWidth*2+3] = PIXEL( (slU_00* 1+slU_01* 3+slU_10* 1+slU_11* 3) >>(DISTORTION+3), (slV_00* 1+slV_01* 3+slV_10* 1+slV_11* 3) >>(DISTORTION+3) );
pulTexture[_pixTexWidth*3+0] = PIXEL( (slU_00* 1 +slU_10* 3 ) >>(DISTORTION+2), (slV_00* 1 +slV_10* 3 ) >>(DISTORTION+2) );
pulTexture[_pixTexWidth*3+1] = PIXEL( (slU_00* 3+slU_01* 1+slU_10* 9+slU_11* 3) >>(DISTORTION+4), (slV_00* 3+slV_01* 1+slV_10* 9+slV_11* 3) >>(DISTORTION+4) );
pulTexture[_pixTexWidth*3+2] = PIXEL( (slU_00* 1+slU_01* 1+slU_10* 3+slU_11* 3) >>(DISTORTION+3), (slV_00* 1+slV_01* 1+slV_10* 3+slV_11* 3) >>(DISTORTION+3) );
pulTexture[_pixTexWidth*3+3] = PIXEL( (slU_00* 1+slU_01* 3+slU_10* 3+slU_11* 9) >>(DISTORTION+4), (slV_00* 1+slV_01* 3+slV_10* 3+slV_11* 9) >>(DISTORTION+4) );
// advance to next texel
pulTexture+=4;
pswHeightMap++;
}
pulTexture+=_pixTexWidth*3;
}
#endif
}
else
{ // DO NOTHING
ASSERTALWAYS( "Effect textures larger than 256 pixels aren't supported");
}
_sfStats.StopTimer(CStatForm::STI_EFFECTRENDER);
}
#pragma warning(default: 4731)
///////////////// Fire
void InitializeFire(void)
{
Randomize( (ULONG)(_pTimer->GetHighPrecisionTimer().GetMilliseconds()));
}
enum PlasmaType {
ptNormal = 0,
ptUp,
ptUpTile,
ptDown,
ptDownTile
};
/*******************************
Plasma Animation
********************************/
static void AnimatePlasma( SLONG slDensity, PlasmaType eType)
{
_sfStats.StartTimer(CStatForm::STI_EFFECTRENDER);
/////////////////////////////////// move plasma
UBYTE *pNew = (UBYTE*)_ptdEffect->td_pubBuffer1;
UBYTE *pOld = (UBYTE*)_ptdEffect->td_pubBuffer2;
PIX pixV, pixU;
PIX pixOffset;
SLONG slLineAbove, slLineBelow, slLineLeft, slLineRight;
ULONG ulNew;
// --------------------------
// Normal plasma
// --------------------------
if (eType == ptNormal) {
// inner rectangle (without 1 pixel border)
pixOffset = _pixBufferWidth;
for( pixV=1; pixV<_pixBufferHeight-1; pixV++) {
for( pixU=0; pixU<_pixBufferWidth; pixU++) {
ulNew = ((((ULONG)pOld[pixOffset - _pixBufferWidth] +
(ULONG)pOld[pixOffset + _pixBufferWidth] +
(ULONG)pOld[pixOffset - 1] +
(ULONG)pOld[pixOffset + 1]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[pixOffset] = ulNew - (ulNew >> slDensity);
pixOffset++;
}
}
// upper horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
slLineBelow = _pixBufferWidth + 1;
slLineLeft = 0;
slLineRight = 2;
pixOffset = 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[pixOffset] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// lower horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1;
slLineBelow = 1;
slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth;
slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2;
pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[pixOffset] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// corner ( 0, 0)
ulNew = ((((ULONG)pOld[_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[1] +
(ULONG)pOld[_pixBufferWidth-1]
)>>2) +
(ULONG)pOld[0]
)>>1;
pNew[0] = ulNew - (ulNew >> slDensity);
// corner ( 0, _pixBufferWidth)
ulNew = ((((ULONG)pOld[(2*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] +
(ULONG)pOld[0] +
(ULONG)pOld[_pixBufferWidth-2]
)>>2) +
(ULONG)pOld[_pixBufferWidth-1]
)>>1;
pNew[_pixBufferWidth-1] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, 0)
ulNew = ((((ULONG)pOld[0] +
(ULONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>2) +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth]
)>>1;
pNew[(_pixBufferHeight-1)*_pixBufferWidth] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, _pixBufferWidth)
ulNew = ((((ULONG)pOld[_pixBufferWidth-1] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2]
)>>2) +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>1;
pNew[(_pixBufferHeight*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity);
// --------------------------
// Plasma going up
// --------------------------
} else if (eType==ptUp || eType==ptUpTile) {
// inner rectangle (without 1 pixel border)
pixOffset = _pixBufferWidth;
for( pixV=1; pixV<_pixBufferHeight-1; pixV++) {
for( pixU=0; pixU<_pixBufferWidth; pixU++) {
ulNew = ((((ULONG)pOld[pixOffset - _pixBufferWidth] +
(ULONG)pOld[pixOffset + _pixBufferWidth] +
(ULONG)pOld[pixOffset - 1] +
(ULONG)pOld[pixOffset + 1]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[pixOffset-_pixBufferWidth] = ulNew - (ulNew >> slDensity);
pixOffset++;
}
}
// tile
if (eType==ptUpTile) {
// upper horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
slLineBelow = _pixBufferWidth + 1;
slLineLeft = 0;
slLineRight = 2;
pixOffset = 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[slLineAbove] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// lower horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1;
slLineBelow = 1;
slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth;
slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2;
pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[slLineAbove] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// corner ( 0, 0)
ulNew = ((((ULONG)pOld[_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[1] +
(ULONG)pOld[_pixBufferWidth-1]
)>>2) +
(ULONG)pOld[0]
)>>1;
pNew[(_pixBufferHeight-1)*_pixBufferWidth] = ulNew - (ulNew >> slDensity);
// corner ( 0, _pixBufferWidth)
ulNew = ((((ULONG)pOld[(2*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] +
(ULONG)pOld[0] +
(ULONG)pOld[_pixBufferWidth-2]
)>>2) +
(ULONG)pOld[_pixBufferWidth-1]
)>>1;
pNew[(_pixBufferHeight*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, 0)
ulNew = ((((ULONG)pOld[0] +
(ULONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>2) +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth]
)>>1;
pNew[(_pixBufferHeight-2)*_pixBufferWidth] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, _pixBufferWidth)
ulNew = ((((ULONG)pOld[_pixBufferWidth-1] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2]
)>>2) +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>1;
pNew[((_pixBufferHeight-1)*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity);
}
// --------------------------
// Plasma going down
// --------------------------
} else if (eType==ptDown || eType==ptDownTile) {
// inner rectangle (without 1 pixel border)
pixOffset = _pixBufferWidth;
for( pixV=1; pixV<_pixBufferHeight-1; pixV++) {
for( pixU=0; pixU<_pixBufferWidth; pixU++) {
ulNew = ((((ULONG)pOld[pixOffset - _pixBufferWidth] +
(ULONG)pOld[pixOffset + _pixBufferWidth] +
(ULONG)pOld[pixOffset - 1] +
(ULONG)pOld[pixOffset + 1]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[pixOffset+_pixBufferWidth] = ulNew - (ulNew >> slDensity);
pixOffset++;
}
}
// tile
if (eType==ptDownTile) {
// upper horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
slLineBelow = _pixBufferWidth + 1;
slLineLeft = 0;
slLineRight = 2;
pixOffset = 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[slLineBelow] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// lower horizontal border (without corners)
slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1;
slLineBelow = 1;
slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth;
slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2;
pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1;
for( pixU=_pixBufferWidth-2; pixU>0; pixU--) {
ulNew = ((((ULONG)pOld[slLineAbove] +
(ULONG)pOld[slLineBelow] +
(ULONG)pOld[slLineLeft] +
(ULONG)pOld[slLineRight]
)>>2) +
(ULONG)pOld[pixOffset]
)>>1;
pNew[slLineBelow] = ulNew - (ulNew >> slDensity);
slLineAbove++;
slLineBelow++;
slLineLeft++;
slLineRight++;
pixOffset++;
}
// corner ( 0, 0)
ulNew = ((((ULONG)pOld[_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[1] +
(ULONG)pOld[_pixBufferWidth-1]
)>>2) +
(ULONG)pOld[0]
)>>1;
pNew[_pixBufferWidth] = ulNew - (ulNew >> slDensity);
// corner ( 0, _pixBufferWidth)
ulNew = ((((ULONG)pOld[(2*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] +
(ULONG)pOld[0] +
(ULONG)pOld[_pixBufferWidth-2]
)>>2) +
(ULONG)pOld[_pixBufferWidth-1]
)>>1;
pNew[(2*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, 0)
ulNew = ((((ULONG)pOld[0] +
(ULONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>2) +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth]
)>>1;
pNew[0] = ulNew - (ulNew >> slDensity);
// corner ( _pixBufferHeight, _pixBufferWidth)
ulNew = ((((ULONG)pOld[_pixBufferWidth-1] +
(ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1] +
(ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2]
)>>2) +
(ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1]
)>>1;
pNew[_pixBufferWidth-1] = ulNew - (ulNew >> slDensity);
}
}
// swap buffers
Swap( _ptdEffect->td_pubBuffer1, _ptdEffect->td_pubBuffer2);
_sfStats.StopTimer(CStatForm::STI_EFFECTRENDER);
}
/*******************************
Fire Animation
********************************/
static void AnimateFire( SLONG slDensity)
{
// _sfStats.StartTimer(CStatForm::STI_EFFECTRENDER);
/////////////////////////////////// move fire
// use only one buffer (otherwise it's not working)
UBYTE *pubNew = (UBYTE*)_ptdEffect->td_pubBuffer2;
SLONG slBufferMask = _pixBufferWidth*_pixBufferHeight -1;
SLONG slColumnModulo = _pixBufferWidth*(_pixBufferHeight-2) -1;
#if ASMOPT == 1
#if (defined __MSVC_INLINE__)
__asm {
push ebx
mov edi,D [ulRNDSeed] ;// EDI = randomizer
mov esi,D [pubNew]
xor ebx,ebx
colLoopFM:
mov ecx,D [_pixBufferHeight]
sub ecx,2
rowLoopFM:
mov edx,D [_pixBufferWidth]
add edx,esi
movzx eax,B [ebx+ edx]
add edx,D [_pixBufferWidth]
movzx edx,B [ebx+ edx]
add eax,edx
shr eax,1
cmp eax,D [slDensity]
jg doCalc
mov B [esi+ebx],0
jmp pixDone
doCalc:
mov edx,edi
sar edx,16
and edx,D [slDensity]
sub eax,edx
movsx edx,B [asbMod3Sub1Table +edx]
add edx,ebx
and edx,D [slBufferMask]
mov B [esi+edx],al
imul edi,262147
pixDone:
// advance to next row
add ebx,D [_pixBufferWidth]
dec ecx
jnz rowLoopFM
// advance to next column
sub ebx,D [slColumnModulo]
cmp ebx,D [_pixBufferWidth]
jl colLoopFM
// all done
mov D [ulRNDSeed],edi
pop ebx
}
#elif (defined __GNU_INLINE__)
__asm__ __volatile__ (
"pushl %%ebx \n\t" // GCC's register.
"xorl %%ebx, %%ebx \n\t"
"pushl %%edx \n\t" // slColumnModulo
"pushl %%ecx \n\t" // slBufferMask
"pushl %%eax \n\t" // slDensity
"0: \n\t" // colLoopFM
"movl (" ASMSYM(_pixBufferHeight) "), %%ecx \n\t"
"subl $2, %%ecx \n\t"
"1: \n\t" // rowLoopFM
"movl (" ASMSYM(_pixBufferWidth) "), %%edx \n\t"
"addl %%esi, %%edx \n\t"
"movzbl (%%ebx, %%edx), %%eax \n\t"
"addl (" ASMSYM(_pixBufferWidth) "), %%edx \n\t"
"movzbl (%%ebx, %%edx), %%edx \n\t"
"addl %%edx, %%eax \n\t"
"shrl $1, %%eax \n\t"
"cmpl (%%esp), %%eax \n\t"
"jg doCalc_animateFire \n\t"
"movb $0, (%%esi, %%ebx) \n\t"
"jmp pixDone_animateFire \n\t"
"doCalc_animateFire: \n\t"
"movl %%edi, %%edx \n\t"
"sarl $16, %%edx \n\t"
"andl (%%esp), %%edx \n\t"
"subl %%edx, %%eax \n\t"
"movsbl " ASMSYM(asbMod3Sub1Table) "(%%edx), %%edx \n\t"
"addl %%ebx, %%edx \n\t"
"andl 4(%%esp), %%edx \n\t" // slBufferMask
"movb %%al, (%%esi, %%edx) \n\t"
"imull $262147, %%edi \n\t"
"pixDone_animateFire: \n\t"
// advance to next row
"addl (" ASMSYM(_pixBufferWidth) "), %%ebx \n\t"
"decl %%ecx \n\t"
"jnz 1b \n\t" // rowLoopFM
// advance to next column
"subl 8(%%esp), %%ebx \n\t" // slColumnModulo
"cmpl (" ASMSYM(_pixBufferWidth) "), %%ebx \n\t"
"jl 0b \n\t" // colLoopFM
// all done
"movl %%edi, (" ASMSYM(ulRNDSeed) ") \n\t"
"addl $12, %%esp \n\t" // lose our locals.
"popl %%ebx \n\t" // Restore GCC's var.
: // no outputs.
: "a" (slDensity), "c" (slBufferMask),
"d" (slColumnModulo), "D" (ulRNDSeed), "S" (pubNew)
: "cc", "memory"
);
#else
#error fill in for you platform.
#endif
#else
// inner rectangle (without 1 pixel border)
for( PIX pixU=0; pixU<_pixBufferWidth; pixU++)
{
SLONG slOffset = pixU;
for( PIX pixV=1; pixV<_pixBufferHeight-1; pixV++)
{
ULONG ulNew = ((ULONG)pubNew[_pixBufferWidth+slOffset] + (ULONG)pubNew[_pixBufferWidth*2+slOffset]) >>1;
if( ulNew>slDensity) {
ULONG ulNewDensity = RNDW&slDensity;
ulNew -= ulNewDensity;
SLONG slDifusion = (SLONG)asbMod3Sub1Table[ulNewDensity]; // (SLONG)(ulNewDensity%3-1);
SLONG slPos = (slDifusion+slOffset) & slBufferMask;
pubNew[slPos] = ulNew;
} else {
pubNew[slOffset] = 0;
}
slOffset += _pixBufferWidth;
}
}
#endif
// _sfStats.StopTimer(CStatForm::STI_EFFECTRENDER);
}
//////////////////////////// displace texture
UBYTE *_pubHeat_RenderPlasmaFire = NULL;
static void RenderPlasmaFire(void)
{
// _sfStats.StartTimer(CStatForm::STI_EFFECTRENDER);
// get and adjust textures' parameters
PIX pixBaseWidth = _ptdBase->GetPixWidth();
ULONG *pulTextureBase = _ptdBase->td_pulFrames;
ULONG *pulTexture = _ptdEffect->td_pulFrames;
ASSERT( _ptdEffect->td_pulFrames!=NULL && _ptdBase->td_pulFrames!=NULL && pixBaseWidth<=256);
UBYTE *pubHeat = (UBYTE*)_ptdEffect->td_pubBuffer2; // heat map pointer
SLONG slHeatMapStep = _pixBufferWidth/_pixTexWidth;
SLONG slHeatRowStep = (slHeatMapStep-1)*_pixBufferWidth;
SLONG slBaseMipShift = 8 - FastLog2(pixBaseWidth);
#if ASMOPT == 1
#if (defined __MSVC_INLINE__)
__asm {
push ebx
mov ebx,D [pubHeat]
mov esi,D [pulTextureBase]
mov edi,D [pulTexture]
mov ecx,D [_pixTexHeight]
rowLoopF:
push ecx
mov edx,D [_pixTexWidth]
mov ecx,D [slBaseMipShift]
pixLoopF:
movzx eax,B [ebx]
shr eax,cl
mov eax,D [esi+ eax*4]
mov D [edi],eax
// advance to next pixel
add ebx,D [slHeatMapStep]
add edi,4
dec edx
jnz pixLoopF
// advance to next row
pop ecx
add ebx,D [slHeatRowStep]
dec ecx
jnz rowLoopF
pop ebx
}
#elif (defined __GNU_INLINE__)
_pubHeat_RenderPlasmaFire = pubHeat; // ran out of registers. :/
__asm__ __volatile__ (
"pushl %%ebx \n\t"
"movl (" ASMSYM(_pubHeat_RenderPlasmaFire) "),%%ebx \n\t"
"pushl %%eax \n\t" // slHeatRowStep
"pushl %%edx \n\t" // slHeatMapStep
"pushl %%ecx \n\t" // slBaseMipShift
"movl (" ASMSYM(_pixTexHeight) "), %%ecx \n\t"
"0: \n\t" // rowLoopF
"pushl %%ecx \n\t"
"movl (" ASMSYM(_pixTexWidth) "), %%edx \n\t"
"movl 4(%%esp), %%ecx \n\t" // slBaseMipShift
"1: \n\t" // pixLoopF
"movzbl (%%ebx), %%eax \n\t"
"shrl %%cl, %%eax \n\t"
"movl (%%esi, %%eax, 4), %%eax \n\t"
"movl %%eax, (%%edi) \n\t"
// advance to next pixel
"addl 8(%%esp), %%ebx \n\t" // slHeatMapStep
"addl $4, %%edi \n\t"
"decl %%edx \n\t"
"jnz 1b \n\t" // pixLoopF
// advance to next row
"popl %%ecx \n\t"
"addl 8(%%esp), %%ebx \n\t" // slHeatRowStep
"decl %%ecx \n\t"
"jnz 0b \n\t" // rowLoopF
"addl $12, %%esp \n\t" // lose our locals.
"popl %%ebx \n\t" // restore GCC's register.
: // no outputs.
: "S" (pulTextureBase), "D" (pulTexture),
"c" (slBaseMipShift), "a" (slHeatRowStep), "d" (slHeatMapStep)
: "cc", "memory"
);
#else
#error fill in for you platform.
#endif
#else
INDEX iPalette;
for( INDEX pixV=0; pixV<_pixTexHeight; pixV++) {
// for every pixel in horizontal line
for( INDEX pixU=0; pixU<_pixTexWidth; pixU++) {
iPalette = (*pubHeat)>>slBaseMipShift;
*pulTexture++ = pulTextureBase[iPalette];
pubHeat += slHeatMapStep;
}
pubHeat += slHeatRowStep;
}
#endif
// _sfStats.StopTimer(CStatForm::STI_EFFECTRENDER);
}
/////////////////////////////////////////////////////////////////////
// EFFECT TABLES
/////////////////////////////////////////////////////////////////////
struct TextureEffectSourceType atestWater[] = {
{
"Raindrops",
InitializeRaindropsStandard,
AnimateRaindropsStandard
},
{
"RaindropsBig",
InitializeRaindropsBig,
AnimateRaindropsBig
},
{
"RaindropsSmall",
InitializeRaindropsSmall,
AnimateRaindropsSmall
},
{
"Random Surfer",
InitializeRandomSurfer,
AnimateRandomSurfer
},
{
"Oscilator",
InitializeOscilator,
AnimateOscilator
},
{
"Vertical Line",
InitializeVertLine,
AnimateVertLine
},
{
"Horizontal Line",
InitializeHortLine,
AnimateHortLine
},
};
struct TextureEffectSourceType atestFire[] = {
{
"Point",
InitializeFirePoint,
AnimateFirePoint
},
{
"Random Point",
InitializeRandomFirePoint,
AnimateRandomFirePoint
},
{
"Shake Point",
InitializeFireShakePoint,
AnimateFireShakePoint
},
{
"Fire Place",
InitializeFirePlace,
AnimateFirePlace
},
{
"Roler",
InitializeFireRoler,
AnimateFireRoler
},
{
"Fall",
InitializeFireFall,
AnimateFireFall
},
{
"Fountain",
InitializeFireFountain,
AnimateFireFountain
},
{
"Side Fountain",
InitializeFireSideFountain,
AnimateFireSideFountain
},
{
"Lightning",
InitializeFireLightning,
AnimateFireLightning
},
{
"Lightning Ball",
InitializeFireLightningBall,
AnimateFireLightningBall
},
{
"Smoke",
InitializeFireSmoke,
AnimateFireSmoke
},
};
inline void AWaterFast(void) { AnimateWater(2); };
inline void AWaterMedium(void) { AnimateWater(3); };
inline void AWaterSlow(void) { AnimateWater(5); };
inline void APlasma(void) { AnimatePlasma(4, ptNormal); };
inline void APlasmaUp(void) { AnimatePlasma(4, ptUp); };
inline void APlasmaUpTile(void) { AnimatePlasma(4, ptUpTile); };
inline void APlasmaDown(void) { AnimatePlasma(5, ptDown); };
inline void APlasmaDownTile(void) { AnimatePlasma(5, ptDownTile); };
inline void APlasmaUpSlow(void) { AnimatePlasma(6, ptUp); };
inline void AFire(void) { AnimateFire(15); };
struct TextureEffectGlobalType _ategtTextureEffectGlobalPresets[] = {
{
"Water Fast",
InitializeWater,
AWaterFast,
sizeof(atestWater)/sizeof(atestWater[0]),
atestWater
},
{
"Water Medium",
InitializeWater,
AWaterMedium,
sizeof(atestWater)/sizeof(atestWater[0]),
atestWater
},
{
"Water Slow",
InitializeWater,
AWaterSlow,
sizeof(atestWater)/sizeof(atestWater[0]),
atestWater
},
{
"",
InitializeWater,
AWaterSlow,
sizeof(atestWater)/sizeof(atestWater[0]),
atestWater
},
{
"Plasma Tile",
InitializeFire,
APlasma,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Plasma Up",
InitializeFire,
APlasmaUp,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Plasma Up Tile",
InitializeFire,
APlasmaUpTile,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Plasma Down",
InitializeFire,
APlasmaDown,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Plasma Down Tile",
InitializeFire,
APlasmaDownTile,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Plasma Up Slow",
InitializeFire,
APlasmaUpSlow,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
{
"Fire",
InitializeFire,
AFire,
sizeof(atestFire)/sizeof(atestFire[0]),
atestFire
},
};
INDEX _ctTextureEffectGlobalPresets = sizeof(_ategtTextureEffectGlobalPresets)
/ sizeof(_ategtTextureEffectGlobalPresets[0]);
// get effect type (TRUE if water type effect, FALSE if plasma or fire effect)
BOOL CTextureEffectGlobal::IsWater(void)
{
return( _ategtTextureEffectGlobalPresets[teg_ulEffectType].tegt_Initialize == InitializeWater);
}
// default constructor
CTextureEffectGlobal::CTextureEffectGlobal(CTextureData *ptdTexture, ULONG ulGlobalEffect)
{
// remember global effect's texture data for cross linking
teg_ptdTexture = ptdTexture;
teg_ulEffectType = ulGlobalEffect;
// init for animating
_ategtTextureEffectGlobalPresets[teg_ulEffectType].tegt_Initialize();
// make sure the texture will be updated next time when used
teg_updTexture.Invalidate();
}
// add new effect source.
void CTextureEffectGlobal::AddEffectSource( ULONG ulEffectSourceType, PIX pixU0, PIX pixV0,
PIX pixU1, PIX pixV1)
{
CTextureEffectSource* ptesNew = teg_atesEffectSources.New(1);
ptesNew->Initialize(this, ulEffectSourceType, pixU0, pixV0, pixU1, pixV1);
}
// animate effect texture
void CTextureEffectGlobal::Animate(void)
{
// if not set yet (funny word construction:)
if( !bTableSet) {
// set table for fast modulo 3 minus 1
for( INDEX i=0; i<256; i++) asbMod3Sub1Table[i]=(SBYTE)((i%3)-1);
bTableSet = TRUE;
}
// setup some internal vars
_ptdEffect = teg_ptdTexture;
_pixBufferWidth = _ptdEffect->td_pixBufferWidth;
_pixBufferHeight = _ptdEffect->td_pixBufferHeight;
_ulBufferMask = _pixBufferHeight*_pixBufferWidth -1;
// remember buffer pointers
_pubDrawBuffer=(UBYTE*)_ptdEffect->td_pubBuffer2;
_pswDrawBuffer=(SWORD*)_ptdEffect->td_pubBuffer2;
// for each effect source
FOREACHINDYNAMICARRAY( teg_atesEffectSources, CTextureEffectSource, itEffectSource) {
// let it animate itself
itEffectSource->Animate();
}
// use animation function for this global effect type
_ategtTextureEffectGlobalPresets[teg_ulEffectType].tegt_Animate();
// remember that it was calculated
teg_updTexture.MarkUpdated();
}
#pragma warning(disable: 4731)
// render effect texture
void CTextureEffectGlobal::Render( INDEX iWantedMipLevel, PIX pixTexWidth, PIX pixTexHeight)
{
// setup some internal vars
_ptdEffect = teg_ptdTexture;
_ptdBase = teg_ptdTexture->td_ptdBaseTexture;
_pixBufferWidth = _ptdEffect->td_pixBufferWidth;
_pixBufferHeight = _ptdEffect->td_pixBufferHeight;
if( IsWater()) {
// use water rendering routine
_pixTexWidth = pixTexWidth;
_pixTexHeight = pixTexHeight;
_iWantedMipLevel = iWantedMipLevel;
RenderWater();
} else {
// use plasma & fire rendering routine
_pixTexWidth = _ptdEffect->GetWidth() >>iWantedMipLevel;
_pixTexHeight = _ptdEffect->GetHeight() >>iWantedMipLevel;
RenderPlasmaFire();
}
}
#pragma warning(default: 4731)
// returns number of second it took to render effect texture
DOUBLE CTextureEffectGlobal::GetRenderingTime(void)
{
return( _sfStats.sf_astTimers[CStatForm::STI_EFFECTRENDER].st_tvElapsed.GetSeconds());
}