/* Copyright (c) 2002-2012 Croteam Ltd. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as published by the Free Software Foundation This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "Engine/StdH.h" #include <Engine/Graphics/Texture.h> #include <Engine/Graphics/TextureEffects.h> #include <Engine/Math/Functions.h> #include <Engine/Base/Timer.h> #include <Engine/Base/Statistics_Internal.h> #include <Engine/Templates/DynamicArray.cpp> #include <Engine/Templates/Stock_CTextureData.h> #include <Engine/Templates/StaticArray.cpp> // asm shortcuts #define O offset #define Q qword ptr #define D dword ptr #define W word ptr #define B byte ptr #if (defined __MSVC_INLINE__) #define ASMOPT 1 #elif (defined __GNU_INLINE_X86_32__) #define ASMOPT 1 #else #define ASMOPT 0 #endif __int64 mmBaseWidthShift=0; __int64 mmBaseWidth=0; __int64 mmBaseWidthMask=0; __int64 mmBaseHeightMask=0; __int64 mmBaseMasks=0; __int64 mmShift=0; #if (defined __GNUC__) /* * If these are "const" vars, they get optimized to hardcoded values when gcc * builds with optimization, which means the linker can't resolve the * references to them in the inline ASM. That's obnoxious. */ __int64 mm1LO = 0x0000000000000001ll; __int64 mm1HI = 0x0000000100000000ll; __int64 mm1HILO = 0x0000000100000001ll; __int64 mm0001 = 0x0000000000000001ll; __int64 mm0010 = 0x0000000000010000ll; __int64 mm00M0 = 0x00000000FFFF0000ll; static void *force_syms_to_exist = NULL; void asm_force_mm1LO() { force_syms_to_exist = &mm1LO; } void asm_force_mm1HI() { force_syms_to_exist = &mm1HI; } void asm_force_mm1HILO() { force_syms_to_exist = &mm1HILO; } void asm_force_mm0001() { force_syms_to_exist = &mm0001; } void asm_force_mm0010() { force_syms_to_exist = &mm0010; } void asm_force_mm00M0() { force_syms_to_exist = &mm00M0; } void asm_force_mmBaseWidthShift() { force_syms_to_exist = &mmBaseWidthShift; } void asm_force_mmBaseWidth() { force_syms_to_exist = &mmBaseWidth; } void asm_force_mmBaseWidthMask() { force_syms_to_exist = &mmBaseWidthMask; } void asm_force_mmBaseHeightMask() { force_syms_to_exist = &mmBaseHeightMask; } void asm_force_mmBaseMasks() { force_syms_to_exist = &mmBaseMasks; } void asm_force_mmShift() { force_syms_to_exist = &mmShift; } #else const __int64 mm1LO = 0x0000000000000001; const __int64 mm1HI = 0x0000000100000000; const __int64 mm1HILO = 0x0000000100000001; const __int64 mm0001 = 0x0000000000000001; const __int64 mm0010 = 0x0000000000010000; const __int64 mm00M0 = 0x00000000FFFF0000; #endif // speed table SBYTE asbMod3Sub1Table[256]; static BOOL bTableSet = FALSE; static CTextureData *_ptdEffect, *_ptdBase; static ULONG _ulBufferMask; static INDEX _iWantedMipLevel; static UBYTE *_pubDrawBuffer; static SWORD *_pswDrawBuffer; PIX _pixTexWidth, _pixTexHeight; PIX _pixBufferWidth, _pixBufferHeight; // randomizer ULONG ulRNDSeed; inline void Randomize( ULONG ulSeed) { if( ulSeed==0) ulSeed = 0x87654321; ulRNDSeed = ulSeed*262147; }; inline ULONG Rnd(void) { ulRNDSeed = ulRNDSeed*262147; return ulRNDSeed; }; #define RNDW (Rnd()>>16) // Initialize the texture effect source. void CTextureEffectSource::Initialize( class CTextureEffectGlobal *ptegGlobalEffect, ULONG ulEffectSourceType, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { // remember global effect for cross linking tes_ptegGlobalEffect = ptegGlobalEffect; tes_ulEffectSourceType = ulEffectSourceType; // obtain effect source table for current effect class struct TextureEffectSourceType *patestSourceEffectTypes = _ategtTextureEffectGlobalPresets[ ptegGlobalEffect->teg_ulEffectType].tet_atestEffectSourceTypes; // init for animating patestSourceEffectTypes[ulEffectSourceType].test_Initialize(this, pixU0, pixV0, pixU1, pixV1); } // Animate the texture effect source. void CTextureEffectSource::Animate(void) { // obtain effect source table for current effect class struct TextureEffectSourceType *patestSourceEffectTypes = _ategtTextureEffectGlobalPresets[ tes_ptegGlobalEffect->teg_ulEffectType] .tet_atestEffectSourceTypes; // animating it patestSourceEffectTypes[tes_ulEffectSourceType].test_Animate(this); } // ---------------------------------------- // SLONG WATER // ---------------------------------------- inline void PutPixelSLONG_WATER( PIX pixU, PIX pixV, INDEX iHeight) { _pswDrawBuffer[(pixV*_pixBufferWidth+pixU)&_ulBufferMask] += iHeight; } inline void PutPixel9SLONG_WATER( PIX pixU, PIX pixV, INDEX iHeightMid) { INDEX iHeightSide = (iHeightMid*28053) >>16; // iHeight /0.851120 *0.364326; INDEX iHeightDiag = (iHeightMid*12008) >>16; // iHeight /0.851120 *0.155951; PutPixelSLONG_WATER( pixU-1, pixV-1, iHeightDiag); PutPixelSLONG_WATER( pixU, pixV-1, iHeightSide); PutPixelSLONG_WATER( pixU+1, pixV-1, iHeightDiag); PutPixelSLONG_WATER( pixU-1, pixV, iHeightSide); PutPixelSLONG_WATER( pixU, pixV, iHeightMid); PutPixelSLONG_WATER( pixU+1, pixV, iHeightSide); PutPixelSLONG_WATER( pixU-1, pixV+1, iHeightDiag); PutPixelSLONG_WATER( pixU, pixV+1, iHeightSide); PutPixelSLONG_WATER( pixU+1, pixV+1, iHeightDiag); } // ---------------------------------------- // UBYTE FIRE // ---------------------------------------- inline void PutPixelUBYTE_FIRE( PIX pixU, PIX pixV, INDEX iHeight) { PIX pixLoc = (pixV*_pixBufferWidth+pixU) & _ulBufferMask; _pubDrawBuffer[pixLoc] = Clamp( _pubDrawBuffer[pixLoc] +iHeight, 0, 255); } inline void PutPixel9UBYTE_FIRE( PIX pixU, PIX pixV, INDEX iHeightMid) { INDEX iHeightSide = (iHeightMid*28053) >>16; // iHeight /0.851120 *0.364326; INDEX iHeightDiag = (iHeightMid*12008) >>16; // iHeight /0.851120 *0.155951; PutPixelUBYTE_FIRE( pixU-1, pixV-1, iHeightDiag); PutPixelUBYTE_FIRE( pixU, pixV-1, iHeightSide); PutPixelUBYTE_FIRE( pixU+1, pixV-1, iHeightDiag); PutPixelUBYTE_FIRE( pixU-1, pixV, iHeightSide); PutPixelUBYTE_FIRE( pixU, pixV, iHeightMid); PutPixelUBYTE_FIRE( pixU+1, pixV, iHeightSide); PutPixelUBYTE_FIRE( pixU-1, pixV+1, iHeightDiag); PutPixelUBYTE_FIRE( pixU, pixV+1, iHeightSide); PutPixelUBYTE_FIRE( pixU+1, pixV+1, iHeightDiag); } inline void PutPixel25UBYTE_FIRE( PIX pixU, PIX pixV, INDEX iHeightMid) { INDEX iHeightSide = (iHeightMid*28053) >>16; // iHeight /0.851120 *0.364326; INDEX iHeightDiag = (iHeightMid*12008) >>16; // iHeight /0.851120 *0.155951; PutPixelUBYTE_FIRE( pixU-2, pixV-2, iHeightDiag); PutPixelUBYTE_FIRE( pixU-1, pixV-2, iHeightSide); PutPixelUBYTE_FIRE( pixU, pixV-2, iHeightSide); PutPixelUBYTE_FIRE( pixU+1, pixV-2, iHeightSide); PutPixelUBYTE_FIRE( pixU+2, pixV-2, iHeightDiag); PutPixelUBYTE_FIRE( pixU-2, pixV-1, iHeightSide); PutPixelUBYTE_FIRE( pixU-1, pixV-1, iHeightSide); PutPixelUBYTE_FIRE( pixU, pixV-1, iHeightMid); PutPixelUBYTE_FIRE( pixU+1, pixV-1, iHeightSide); PutPixelUBYTE_FIRE( pixU+2, pixV-1, iHeightSide); PutPixelUBYTE_FIRE( pixU-2, pixV, iHeightSide); PutPixelUBYTE_FIRE( pixU-1, pixV, iHeightMid); PutPixelUBYTE_FIRE( pixU, pixV, iHeightMid); PutPixelUBYTE_FIRE( pixU+1, pixV, iHeightMid); PutPixelUBYTE_FIRE( pixU+2, pixV, iHeightSide); PutPixelUBYTE_FIRE( pixU-2, pixV+1, iHeightSide); PutPixelUBYTE_FIRE( pixU-1, pixV+1, iHeightSide); PutPixelUBYTE_FIRE( pixU, pixV+1, iHeightMid); PutPixelUBYTE_FIRE( pixU+1, pixV+1, iHeightSide); PutPixelUBYTE_FIRE( pixU+2, pixV+1, iHeightSide); PutPixelUBYTE_FIRE( pixU+2, pixV+2, iHeightDiag); PutPixelUBYTE_FIRE( pixU-1, pixV+2, iHeightSide); PutPixelUBYTE_FIRE( pixU, pixV+2, iHeightSide); PutPixelUBYTE_FIRE( pixU+1, pixV+2, iHeightSide); PutPixelUBYTE_FIRE( pixU-2, pixV+2, iHeightDiag); } ///////////////////////////////////////////////////////////////////// // WATER EFFECTS ///////////////////////////////////////////////////////////////////// // WARNING: Changing this value will BREAK the inline asm on // GNU-based platforms (Linux, etc.) YOU HAVE BEEN WARNED. #define DISTORTION 3 //3 ///////////////// random surfer struct Surfer { FLOAT fU; FLOAT fV; FLOAT fAngle; }; void InitializeRandomSurfer(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { Surfer &sf = (*((Surfer *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); sf.fU = pixU0; sf.fV = pixV0; sf.fAngle = RNDW&7; } void AnimateRandomSurfer(CTextureEffectSource *ptes) { Surfer &sf = (*((Surfer *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); PutPixel9SLONG_WATER((long) sf.fU, (long) sf.fV, 125); sf.fU += 2*sin(sf.fAngle); sf.fV += 2*cos(sf.fAngle); PutPixel9SLONG_WATER((long) sf.fU, (long) sf.fV, 250); if((RNDW&15)==0) { sf.fAngle += 3.14f/7.0f; } if((RNDW&15)==0) { sf.fAngle -= 3.14f/5.0f; } } ///////////////// raindrops struct Raindrop { UBYTE pixU; UBYTE pixV; SWORD iHeight; SWORD iIndex; }; void InitializeRaindrops(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1, int iHeight) { for (int iIndex=0; iIndex<5; iIndex++) { Raindrop &rd = ((Raindrop&) ptes->tes_tespEffectSourceProperties.tesp_achDummy[iIndex*sizeof(Raindrop)]); rd.pixU = RNDW&(_pixBufferWidth -1); rd.pixV = RNDW&(_pixBufferHeight-1); rd.iHeight = RNDW&iHeight; rd.iIndex = iIndex*8; } } void InitializeRaindropsStandard(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { InitializeRaindrops(ptes, pixU0, pixV0, pixU1, pixV1, 255); } void InitializeRaindropsBig(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { InitializeRaindrops(ptes, pixU0, pixV0, pixU1, pixV1, 1023); } void InitializeRaindropsSmall(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { InitializeRaindrops(ptes, pixU0, pixV0, pixU1, pixV1, 31); } void AnimateRaindrops(CTextureEffectSource *ptes, int iHeight) { for (int iIndex=0; iIndex<5; iIndex++) { Raindrop &rd = ((Raindrop&) ptes->tes_tespEffectSourceProperties.tesp_achDummy[iIndex*sizeof(Raindrop)]); if (rd.iIndex < 48) { rd.iIndex++; if (rd.iIndex < 8) { PutPixel9SLONG_WATER(rd.pixU, rd.pixV, (long) sin(rd.iIndex/4.0f*(-3.14f))*rd.iHeight); } } else { rd.pixU = RNDW&(_pixBufferWidth -1); rd.pixV = RNDW&(_pixBufferHeight-1); rd.iHeight = RNDW&iHeight; rd.iIndex = 0; } } } void AnimateRaindropsStandard(CTextureEffectSource *ptes) { AnimateRaindrops(ptes, 255); } void AnimateRaindropsBig(CTextureEffectSource *ptes) { AnimateRaindrops(ptes, 1023); } void AnimateRaindropsSmall(CTextureEffectSource *ptes) { AnimateRaindrops(ptes, 31); } ///////////////// oscilator struct Oscilator { UBYTE pixU; UBYTE pixV; FLOAT fAngle; }; void InitializeOscilator(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { Oscilator &os = (*((Oscilator *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); os.pixU = pixU0; os.pixV = pixV0; os.fAngle = -3.14f; } void AnimateOscilator(CTextureEffectSource *ptes) { Oscilator &os = (*((Oscilator *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); PutPixel9SLONG_WATER(os.pixU, os.pixV, (long) sin(os.fAngle)*150); os.fAngle += (3.14f/6); } ///////////////// Vertical Line struct VertLine{ UBYTE pixU; UBYTE pixV; UWORD uwSize; FLOAT fAngle; }; void InitializeVertLine(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { VertLine &vl = (*((VertLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); vl.pixU = pixU0; vl.pixV = pixV0; vl.fAngle = -3.14f; if (pixV0==pixV1) { vl.uwSize = 16; } else { vl.uwSize = abs(pixV1-pixV0); } } void AnimateVertLine(CTextureEffectSource *ptes) { VertLine &vl = (*((VertLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); PIX pixV = vl.pixV; for (int iCnt=0; iCnt<vl.uwSize; iCnt++) { PutPixelSLONG_WATER(vl.pixU, pixV, (long) (sin(vl.fAngle)*25)); pixV = (pixV+1)&(_pixBufferHeight-1); } vl.fAngle += (3.14f/6); } ///////////////// Horizontal Line struct HortLine{ UBYTE pixU; UBYTE pixV; UWORD uwSize; FLOAT fAngle; }; void InitializeHortLine(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { HortLine &hl = (*((HortLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); hl.pixU = pixU0; hl.pixV = pixV0; hl.fAngle = -3.14f; if (pixU0==pixU1) { hl.uwSize = 16; } else { hl.uwSize = abs(pixU1-pixU0); } } void AnimateHortLine(CTextureEffectSource *ptes) { HortLine &hl = (*((HortLine *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); PIX pixU = hl.pixU; for (int iCnt=0; iCnt<hl.uwSize; iCnt++) { PutPixelSLONG_WATER(pixU, hl.pixV, (long) (sin(hl.fAngle)*25)); pixU = (pixU+1)&(_pixBufferWidth-1); } hl.fAngle += (3.14f/6); } ///////////////////////////////////////////////////////////////////// // FIRE EFFECTS ///////////////////////////////////////////////////////////////////// ///////////////// Fire Point struct FirePoint{ UBYTE pixU; UBYTE pixV; }; void InitializeFirePoint(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FirePoint &ft = (*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); ft.pixU = pixU0; ft.pixV = pixV0; } void AnimateFirePoint(CTextureEffectSource *ptes) { FirePoint &ft = (*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); PutPixel9UBYTE_FIRE(ft.pixU, ft.pixV, 255); } void InitializeRandomFirePoint(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FirePoint &ft = (*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); ft.pixU = pixU0; ft.pixV = pixV0; } void AnimateRandomFirePoint(CTextureEffectSource *ptes) { FirePoint &ft = (*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); PutPixel9UBYTE_FIRE(ft.pixU, ft.pixV, RNDW&255); } void InitializeFireShakePoint(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FirePoint &ft = (*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); ft.pixU = pixU0; ft.pixV = pixV0; } void AnimateFireShakePoint(CTextureEffectSource *ptes) { FirePoint &ft = (*((FirePoint *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); UBYTE pixU, pixV; pixU = RNDW%3 - 1; pixV = RNDW%3 - 1; PutPixel9UBYTE_FIRE(ft.pixU+pixU, ft.pixV+pixV, 255); } ///////////////// Fire Place #define FIREPLACE_SIZE 60 struct FirePlace{ UBYTE pixU; UBYTE pixV; UBYTE ubWidth; UBYTE aubFire[FIREPLACE_SIZE]; }; void InitializeFirePlace(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FirePlace &fp = (*((FirePlace *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); fp.pixU = pixU0; fp.pixV = pixV0; fp.ubWidth = abs(pixU1-pixU0); if (fp.ubWidth>FIREPLACE_SIZE) fp.ubWidth=FIREPLACE_SIZE; if (fp.ubWidth<10) fp.ubWidth = 10; // clear fire array for (int iCnt=0; iCnt<fp.ubWidth; iCnt++) { fp.aubFire[iCnt] = 0; } } void AnimateFirePlace(CTextureEffectSource *ptes) { INDEX iIndex; FirePlace &fp = (*((FirePlace *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); ULONG ulRND = RNDW&255; // match if (ulRND>200) { ULONG ulMatchIndex = ulRND%(fp.ubWidth-5); for (iIndex=0; iIndex<5; iIndex++) { fp.aubFire[ulMatchIndex+iIndex] = 255; } // water } else if (ulRND<50) { for (iIndex=0; iIndex<10; iIndex++) { fp.aubFire[RNDW%fp.ubWidth] = 0; } } // fix fire place for (iIndex=0; iIndex<fp.ubWidth; iIndex++) { UBYTE ubFlame = fp.aubFire[iIndex]; // flame is fading ? if (ubFlame < 50) { // starting to burn if (ubFlame > 10) { ubFlame += RNDW%30; //30 // give more fire } else { ubFlame += RNDW%30+30; //30,30 } } fp.aubFire[iIndex] = ubFlame; } // water on edges for (iIndex=0; iIndex<4; iIndex++) { INDEX iWater = RNDW%4; fp.aubFire[iWater] = 0; fp.aubFire[fp.ubWidth-1-iWater] = 0; } // smooth fire place for (iIndex=1; iIndex<(fp.ubWidth-1); iIndex++) { fp.aubFire[iIndex] = (fp.aubFire[iIndex-1]+fp.aubFire[iIndex]+fp.aubFire[iIndex+1])/3; } // draw fire place in buffer for (iIndex=0; iIndex<fp.ubWidth; iIndex++) { PutPixel9UBYTE_FIRE(fp.pixU+iIndex, fp.pixV, fp.aubFire[iIndex]); } } ///////////////// Fire Roler struct FireRoler{ UBYTE pixU; UBYTE pixV; //FLOAT fRadius; FLOAT fRadiusU; FLOAT fRadiusV; FLOAT fAngle; FLOAT fAngleAdd; }; void InitializeFireRoler(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FireRoler &fr = (*((FireRoler *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); fr.pixU = pixU0; fr.pixV = pixV0; if (pixU0==pixU1 && pixV0==pixV1) { //fr.fRadius = 3; fr.fRadiusU = 3; fr.fRadiusV = 3; fr.fAngleAdd = (3.14f/6); } else { //fr.fRadius = sqrt((pixU1-pixU0)*(pixU1-pixU0) + (pixV1-pixV0)*(pixV1-pixV0)); fr.fRadiusU = pixU1-pixU0; fr.fRadiusV = pixV1-pixV0; //fr.fAngleAdd = (3.14f/((fr.fRadius)*2)); fr.fAngleAdd = (3.14f/(Abs(fr.fRadiusU)+Abs(fr.fRadiusV))); } fr.fAngle = 0; } void AnimateFireRoler(CTextureEffectSource *ptes) { FireRoler &fr = (*((FireRoler *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); PutPixel9UBYTE_FIRE((long) (cos(fr.fAngle)*fr.fRadiusU + fr.pixU), (long) (sin(fr.fAngle)*fr.fRadiusV + fr.pixV), 255); fr.fAngle += fr.fAngleAdd; PutPixel9UBYTE_FIRE((long) (cos(fr.fAngle)*fr.fRadiusU + fr.pixU), (long) (sin(fr.fAngle)*fr.fRadiusV + fr.pixV), 200); fr.fAngle += fr.fAngleAdd; PutPixel9UBYTE_FIRE((long) (cos(fr.fAngle)*fr.fRadiusU + fr.pixU), (long) (sin(fr.fAngle)*fr.fRadiusV + fr.pixV), 150); fr.fAngle += fr.fAngleAdd; } ///////////////// Fire Fall #define FIREFALL_POINTS 100 struct FireFall{ UBYTE pixU; UBYTE pixV; ULONG ulWidth; ULONG ulPointToReinitialize; }; struct FireFallPixel{ UBYTE pixU; UBYTE pixV; UBYTE ubSpeed; }; void InitializeFireFall(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FireFall &ff = (*((FireFall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); ff.pixU = pixU0; ff.pixV = pixV0; if (pixU0==pixU1) { ff.ulWidth = 15; } else { ff.ulWidth = abs(pixU1-pixU0); } // initialize fall points ptes->tes_atepPixels.New(FIREFALL_POINTS); ff.ulPointToReinitialize = 0; for (INDEX iIndex=0; iIndex<FIREFALL_POINTS; iIndex++) { FireFallPixel &ffp = ((FireFallPixel&) ptes->tes_atepPixels[iIndex]); ffp.pixU = ff.pixU+(RNDW%ff.ulWidth); ffp.pixV = ff.pixV+(RNDW%_pixBufferHeight); ffp.ubSpeed = (RNDW&1)+2; } } void AnimateFireFall(CTextureEffectSource *ptes) { FireFall &ff = (*((FireFall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); // animate fall points for (INDEX iIndex=0; iIndex<FIREFALL_POINTS; iIndex++) { FireFallPixel &ffp = ((FireFallPixel&) ptes->tes_atepPixels[iIndex]); // fall from fall int iHeight = (RNDW&3)*64 + 40; if (ffp.ubSpeed == 2) { PutPixelUBYTE_FIRE(ffp.pixU+(RNDW%3)-1, ffp.pixV, iHeight); PutPixelUBYTE_FIRE(ffp.pixU+(RNDW%3)-1, ffp.pixV+1, iHeight-40); } else { PutPixelUBYTE_FIRE(ffp.pixU, ffp.pixV, iHeight); PutPixelUBYTE_FIRE(ffp.pixU, ffp.pixV+1, iHeight-40); } ffp.pixV+=ffp.ubSpeed; // when falled down reinitialize if (ffp.pixV >= _pixBufferHeight) { if (ff.ulPointToReinitialize == iIndex) { ff.ulPointToReinitialize++; if (ff.ulPointToReinitialize >= FIREFALL_POINTS) ff.ulPointToReinitialize = 0; ffp.pixU = ff.pixU+(RNDW%ff.ulWidth); ffp.pixV -= _pixBufferHeight; ffp.ubSpeed = (RNDW&1)+2; } else { ffp.pixV -= _pixBufferHeight; } } } } ///////////////// Fire Fountain #define FIREFOUNTAIN_POINTS 100 struct FireFountain{ UBYTE pixU; UBYTE pixV; ULONG ulWidth; ULONG ulBaseHeight; ULONG ulRandomHeight; }; struct FireFountainPixel{ SWORD pixU; SWORD pixV; UBYTE pixLastU; UBYTE pixLastV; SWORD sbSpeedU; SWORD sbSpeedV; }; void InitializeFireFountain(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FireFountain &ff = (*((FireFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); ff.pixU = pixU0; ff.pixV = pixV0; // fountain width if (pixU0==pixU1) { ff.ulWidth = 31; } else { ff.ulWidth = abs(pixU1-pixU0)*2; } // fountain height if (pixV0==pixV1) { ff.ulBaseHeight = 120; ff.ulRandomHeight = 40; } else { ff.ulBaseHeight = abs(pixV1-pixV0)*3; ff.ulRandomHeight = abs(pixV1-pixV0); } // initialize fountain points ptes->tes_atepPixels.New(FIREFOUNTAIN_POINTS*2); for (INDEX iIndex=0; iIndex<FIREFOUNTAIN_POINTS*2; iIndex+=2) { FireFountainPixel &ffp = ((FireFountainPixel&) ptes->tes_atepPixels[iIndex]); ffp.pixU = (ff.pixU)<<6; ffp.pixV = (RNDW%(_pixBufferHeight-(_pixBufferHeight>>3))+(_pixBufferHeight>>3))<<6; ffp.pixLastU = (ffp.pixU)>>6; ffp.pixLastV = (ffp.pixV)>>6; ffp.sbSpeedU = 0; ffp.sbSpeedV = 0; } } void AnimateFireFountain(CTextureEffectSource *ptes) { FireFountain &ff = (*((FireFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); // animate fountain points for (INDEX iIndex=0; iIndex<FIREFOUNTAIN_POINTS*2; iIndex+=2) { FireFountainPixel &ffp = ((FireFountainPixel&) ptes->tes_atepPixels[iIndex]); // fall from fountain PutPixelUBYTE_FIRE((ffp.pixU)>>6, (ffp.pixV)>>6, 200); PutPixelUBYTE_FIRE(ffp.pixLastU, ffp.pixLastV, 150); // move pixel ffp.pixLastU = (ffp.pixU)>>6; ffp.pixLastV = (ffp.pixV)>>6; ffp.pixU+=ffp.sbSpeedU; ffp.pixV-=ffp.sbSpeedV; ffp.sbSpeedV-=8; // when falled down reinitialize if ((ffp.pixV>>6) >= (_pixBufferHeight-5)) { ffp.pixU = (ff.pixU)<<6; ffp.pixV = (ff.pixV)<<6; ffp.pixLastU = (ffp.pixU)>>6; ffp.pixLastV = (ffp.pixV)>>6; ffp.sbSpeedU = (RNDW%ff.ulWidth)-(ff.ulWidth/2-1); ffp.sbSpeedV = (RNDW%ff.ulRandomHeight)+ff.ulBaseHeight; } } } ///////////////// Fire Fountain #define FIRESIDEFOUNTAIN_POINTS 100 struct FireSideFountain{ UBYTE pixU; UBYTE pixV; ULONG ulBaseWidth; ULONG ulRandomWidth; ULONG ulSide; }; struct FireSideFountainPixel{ SWORD pixU; SWORD pixV; UBYTE pixLastU; UBYTE pixLastV; SWORD sbSpeedU; SWORD sbSpeedV; }; void InitializeFireSideFountain(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FireSideFountain &fsf = (*((FireSideFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); fsf.pixU = pixU0; fsf.pixV = pixV0; // fountain width if (pixU0==pixU1) { fsf.ulBaseWidth = 80; fsf.ulRandomWidth = 40; fsf.ulSide = (pixU0>(_pixBufferWidth/2)); } else { fsf.ulBaseWidth = abs(pixU1-pixU0)*2; fsf.ulRandomWidth = abs(pixU1-pixU0); fsf.ulSide = (pixU1<pixU0); } // initialize fountain points ptes->tes_atepPixels.New(FIRESIDEFOUNTAIN_POINTS*2); for (INDEX iIndex=0; iIndex<FIRESIDEFOUNTAIN_POINTS*2; iIndex+=2) { FireSideFountainPixel &fsfp = ((FireSideFountainPixel&) ptes->tes_atepPixels[iIndex]); fsfp.pixU = (fsf.pixU)<<6; fsfp.pixV = (RNDW%(_pixBufferHeight-(_pixBufferHeight>>3))+(_pixBufferHeight>>3))<<6; fsfp.pixLastU = (fsfp.pixU)>>6; fsfp.pixLastV = (fsfp.pixV)>>6; fsfp.sbSpeedU = 0; fsfp.sbSpeedV = 0; } } void AnimateFireSideFountain(CTextureEffectSource *ptes) { FireSideFountain &fsf = (*((FireSideFountain *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); // animate fountain points for (INDEX iIndex=0; iIndex<FIRESIDEFOUNTAIN_POINTS*2; iIndex+=2) { FireSideFountainPixel &fsfp = ((FireSideFountainPixel&) ptes->tes_atepPixels[iIndex]); // fall from fountain PutPixelUBYTE_FIRE((fsfp.pixU)>>6, (fsfp.pixV)>>6, 200); PutPixelUBYTE_FIRE(fsfp.pixLastU, fsfp.pixLastV, 150); // move pixel fsfp.pixLastU = (fsfp.pixU)>>6; fsfp.pixLastV = (fsfp.pixV)>>6; fsfp.pixU+=fsfp.sbSpeedU; fsfp.pixV-=fsfp.sbSpeedV; fsfp.sbSpeedV-=8; // when falled down reinitialize if ((fsfp.pixV>>6) >= (_pixBufferHeight-5)) { fsfp.pixU = (fsf.pixU)<<6; fsfp.pixV = (fsf.pixV)<<6; fsfp.pixLastU = (fsfp.pixU)>>6; fsfp.pixLastV = (fsfp.pixV)>>6; fsfp.sbSpeedU = (RNDW%fsf.ulRandomWidth)+fsf.ulBaseWidth; if (fsf.ulSide) { fsfp.sbSpeedU = -fsfp.sbSpeedU; } fsfp.sbSpeedV = 0; } } } ///////////////// Fire Lightning struct FireLightning{ FLOAT fpixUFrom; FLOAT fpixVFrom; FLOAT fpixUTo; FLOAT fpixVTo; FLOAT fvU; FLOAT fvV; FLOAT fvNormalU; FLOAT fvNormalV; FLOAT fDistance; SLONG slCnt; }; void InitializeFireLightning(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FireLightning &fl = (*((FireLightning *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); fl.fpixUFrom = (FLOAT) pixU0; fl.fpixVFrom = (FLOAT) pixV0; if (pixU0==pixU1 && pixV0==pixV1) { fl.fpixUTo = Abs((FLOAT)_pixBufferWidth -fl.fpixUFrom); fl.fpixVTo = Abs((FLOAT)_pixBufferHeight-fl.fpixVFrom); } else { fl.fpixUTo = (FLOAT) pixU1; fl.fpixVTo = (FLOAT) pixV1; } fl.fDistance = sqrt((fl.fpixUTo-fl.fpixUFrom)*(fl.fpixUTo-fl.fpixUFrom)+ (fl.fpixVTo-fl.fpixVFrom)*(fl.fpixVTo-fl.fpixVFrom)); // vector fl.fvU = (fl.fpixUTo-fl.fpixUFrom)/fl.fDistance; fl.fvV = (fl.fpixVTo-fl.fpixVFrom)/fl.fDistance; // normal vector fl.fvNormalU = -fl.fvV; fl.fvNormalV = fl.fvU; // frame counter fl.slCnt = 2; } void AnimateFireLightning(CTextureEffectSource *ptes) { FLOAT fU, fV, fLastU, fLastV; FLOAT fDU, fDV, fCnt; SLONG slRND; ULONG ulDist; FireLightning &fl = (*((FireLightning *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); // last point -> starting point fLastU = fl.fpixUFrom; fLastV = fl.fpixVFrom; fl.slCnt--; if (fl.slCnt == 0) { ulDist = 0; while ((FLOAT)ulDist<fl.fDistance) { // go away from source point to destination point ulDist += (RNDW%5)+5; if ((FLOAT)ulDist>=fl.fDistance) { // move point to line end fU = fl.fpixUTo; fV = fl.fpixVTo; } else { // move point on line fU = fl.fpixUFrom + fl.fvU*(FLOAT)ulDist; fV = fl.fpixVFrom + fl.fvV*(FLOAT)ulDist; // move point offset on normal line slRND = (SLONG) (RNDW%11)-5; fU += fl.fvNormalU*(FLOAT)slRND; fV += fl.fvNormalV*(FLOAT)slRND; } // draw line fDU = fU-fLastU; fDV = fV-fLastV; if (Abs(fDU)>Abs(fDV)) fCnt = Abs(fDU); else fCnt = Abs(fDV); fDU = fDU/fCnt; fDV = fDV/fCnt; while (fCnt>0.0f) { PutPixelUBYTE_FIRE((PIX) fLastU, (PIX) fLastV, 255); fLastU += fDU; fLastV += fDV; fCnt -= 1; } // store last point fLastU = fU; fLastV = fV; } fl.slCnt = 2; } } ///////////////// Fire Lightning Ball #define FIREBALL_LIGHTNINGS 2 struct FireLightningBall{ FLOAT fpixU; FLOAT fpixV; FLOAT fRadiusU; FLOAT fRadiusV; }; void InitializeFireLightningBall(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FireLightningBall &flb = (*((FireLightningBall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); flb.fpixU = (FLOAT) pixU0; flb.fpixV = (FLOAT) pixV0; if (pixU0==pixU1 && pixV0==pixV1) { flb.fRadiusU = 20; flb.fRadiusV = 20; } else { flb.fRadiusU = pixU1-pixU0; flb.fRadiusV = pixV1-pixV0; } } void AnimateFireLightningBall(CTextureEffectSource *ptes) { FLOAT fU, fV, fLastU, fLastV, fvU, fvV, fvNormalU, fvNormalV; FLOAT fDU, fDV, fCnt, fDistance; FLOAT fDestU, fDestV, fAngle; SLONG slRND; ULONG ulDist; FireLightningBall &flb = (*((FireLightningBall *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); for (int iBalls=0; iBalls<FIREBALL_LIGHTNINGS; iBalls++) { // last point -> starting point fLastU = flb.fpixU; fLastV = flb.fpixV; // destination point fAngle = (FLOAT) RNDW/10000; fDestU = flb.fpixU + flb.fRadiusU*cos(fAngle); fDestV = flb.fpixV + flb.fRadiusV*sin(fAngle); fDistance = sqrt((fDestU-fLastU)*(fDestU-fLastU)+ (fDestV-fLastV)*(fDestV-fLastV)); // vector fvU = (fDestU-fLastU)/fDistance; fvV = (fDestV-fLastV)/fDistance; // normal vector fvNormalU = -fvV; fvNormalV = fvU; ulDist = 0; while ((FLOAT)ulDist<fDistance) { // go away from source point to destination point ulDist += (RNDW%5)+5; if ((FLOAT)ulDist>=fDistance) { // move point on line fU = fDestU; fV = fDestV; } else { // move point on line fU = flb.fpixU + fvU*(FLOAT)ulDist; fV = flb.fpixV + fvV*(FLOAT)ulDist; // move point offset on normal line slRND = (SLONG) (RNDW%11)-5; fU += fvNormalU*(FLOAT)slRND; fV += fvNormalV*(FLOAT)slRND; } // draw line fDU = fU-fLastU; fDV = fV-fLastV; // counter if (Abs(fDU)>Abs(fDV)) fCnt = Abs(fDU); else fCnt = Abs(fDV); fDU = fDU/fCnt; fDV = fDV/fCnt; while (fCnt>0.0f) { PutPixelUBYTE_FIRE((PIX) fLastU, (PIX) fLastV, 255); fLastU += fDU; fLastV += fDV; fCnt -= 1; } // store last point fLastU = fU; fLastV = fV; } } } ///////////////// Fire Smoke #define SMOKE_POINTS 50 struct FireSmoke{ FLOAT fpixU; FLOAT fpixV; }; struct FireSmokePoint{ FLOAT fpixU; FLOAT fpixV; FLOAT fSpeedV; }; void InitializeFireSmoke(CTextureEffectSource *ptes, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { FireSmoke &fs = (*((FireSmoke *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); fs.fpixU = (FLOAT) pixU0; fs.fpixV = (FLOAT) pixV0; if (pixU0==pixU1 && pixV0==pixV1) { } else { } // initialize smoke points ptes->tes_atepPixels.New(SMOKE_POINTS*2); for (INDEX iIndex=0; iIndex<SMOKE_POINTS*2; iIndex+=2) { FireSmokePoint &fsp = ((FireSmokePoint&) ptes->tes_atepPixels[iIndex]); fsp.fpixU = FLOAT (pixU0 + (iIndex-(SMOKE_POINTS))/8); fsp.fpixV = FLOAT (pixV0); fsp.fSpeedV = 0.0f; } } void AnimateFireSmoke(CTextureEffectSource *ptes) { int iHeat; FLOAT fRatio = 32.0f / (FLOAT)_pixBufferHeight; UBYTE pixU, pixV; FireSmoke &fs = (*((FireSmoke *) ptes->tes_tespEffectSourceProperties.tesp_achDummy)); // animate smoke points for (INDEX iIndex=0; iIndex<SMOKE_POINTS*2; iIndex+=2) { FireSmokePoint &fsp = ((FireSmokePoint&) ptes->tes_atepPixels[iIndex]); pixU = RNDW%3 - 1; pixV = RNDW%3 - 1; if (fsp.fSpeedV<0.1f) { PutPixelUBYTE_FIRE((PIX) fsp.fpixU, (PIX) fsp.fpixV, RNDW%128); } else { iHeat = int(fsp.fpixV*fRatio+1); PutPixel25UBYTE_FIRE((PIX) fsp.fpixU+pixU, (PIX) fsp.fpixV+pixV, RNDW%iHeat); } // start moving up if (fsp.fSpeedV<0.1f && (RNDW&255)==0) { fsp.fSpeedV = 1.0f; } // move up fsp.fpixV -= fsp.fSpeedV; // at the end of texture go on bottom if (fsp.fpixV<=(FLOAT)_pixBufferHeight) { fsp.fpixV = fs.fpixV; fsp.fSpeedV = 0.0f; } } } ///////////////// Water void InitializeWater(void) { Randomize( (ULONG)(_pTimer->GetHighPrecisionTimer().GetMilliseconds())); } /******************************* Water Animation ********************************/ static void AnimateWater( SLONG slDensity) { _sfStats.StartTimer(CStatForm::STI_EFFECTRENDER); /////////////////////////////////// move water SWORD *pNew = (SWORD*)_ptdEffect->td_pubBuffer1; SWORD *pOld = (SWORD*)_ptdEffect->td_pubBuffer2; PIX pixV, pixU; PIX pixOffset, iNew; SLONG slLineAbove, slLineBelow, slLineLeft, slLineRight; // inner rectangle (without 1 pixel top and bottom line) pixOffset = _pixBufferWidth + 1; for( pixV=_pixBufferHeight-2; pixV>0; pixV--) { for( pixU=_pixBufferWidth; pixU>0; pixU--) { iNew = (( (SLONG)pOld[pixOffset - _pixBufferWidth] + (SLONG)pOld[pixOffset + _pixBufferWidth] + (SLONG)pOld[pixOffset - 1] + (SLONG)pOld[pixOffset + 1] ) >> 1) - (SLONG)pNew[pixOffset]; pNew[pixOffset] = iNew - (iNew >> slDensity); pixOffset++; } } // upper horizontal border (without corners) slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1; slLineBelow = _pixBufferWidth + 1; slLineLeft = 0; slLineRight = 2; pixOffset = 1; for( pixU=_pixBufferWidth-2; pixU>0; pixU--) { iNew = (( (SLONG)pOld[slLineAbove] + (SLONG)pOld[slLineBelow] + (SLONG)pOld[slLineLeft] + (SLONG)pOld[slLineRight] ) >> 1) - (SLONG)pNew[pixOffset]; pNew[pixOffset] = iNew - (iNew >> slDensity); slLineAbove++; slLineBelow++; slLineLeft++; slLineRight++; pixOffset++; } // lower horizontal border (without corners) slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1; slLineBelow = 1; slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth; slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2; pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1; for( pixU=_pixBufferWidth-2; pixU>0; pixU--) { iNew = (( (SLONG)pOld[slLineAbove] + (SLONG)pOld[slLineBelow] + (SLONG)pOld[slLineLeft] + (SLONG)pOld[slLineRight] ) >> 1) - (SLONG)pNew[pixOffset]; pNew[pixOffset] = iNew - (iNew >> slDensity); slLineAbove++; slLineBelow++; slLineLeft++; slLineRight++; pixOffset++; } // corner ( 0, 0) iNew = (( (SLONG)pOld[_pixBufferWidth] + (SLONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] + (SLONG)pOld[1] + (SLONG)pOld[_pixBufferWidth-1] ) >> 1) - (SLONG)pNew[0]; pNew[0] = iNew - (iNew >> slDensity); // corner ( 0, _pixBufferWidth) iNew = (( (SLONG)pOld[(2*_pixBufferWidth) - 1] + (SLONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] + (SLONG)pOld[0] + (SLONG)pOld[_pixBufferWidth-2] ) >> 1) - (SLONG)pNew[_pixBufferWidth-1]; pNew[_pixBufferWidth-1] = iNew - (iNew >> slDensity); // corner ( _pixBufferHeight, 0) iNew = (( (SLONG)pOld[0] + (SLONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth] + (SLONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1] + (SLONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] ) >> 1) - (SLONG)pNew[(_pixBufferHeight-1)*_pixBufferWidth]; pNew[(_pixBufferHeight-1)*_pixBufferWidth] = iNew - (iNew >> slDensity); // corner ( _pixBufferHeight, _pixBufferWidth) iNew = (( (SLONG)pOld[_pixBufferWidth-1] + (SLONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1] + (SLONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] + (SLONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2] ) >> 1) - (SLONG)pNew[(_pixBufferHeight*_pixBufferWidth) - 1]; pNew[(_pixBufferHeight*_pixBufferWidth) - 1] = iNew - (iNew >> slDensity); // swap buffers Swap( _ptdEffect->td_pubBuffer1, _ptdEffect->td_pubBuffer2); _sfStats.StopTimer(CStatForm::STI_EFFECTRENDER); } //////////////////////////// displace texture #define PIXEL(u,v) pulTextureBase[ ((u)&(SLONG&)mmBaseWidthMask) + ((v)&(SLONG&)mmBaseHeightMask) *pixBaseWidth] ULONG _slHeightMapStep_renderWater = 0; PIX _pixBaseWidth_renderWater = 0; #pragma warning(disable: 4731) static void RenderWater(void) { _sfStats.StartTimer(CStatForm::STI_EFFECTRENDER); // get textures' parameters ULONG *pulTexture = _ptdEffect->td_pulFrames; PIX pixBaseWidth = _ptdBase->GetPixWidth(); PIX pixBaseHeight = _ptdBase->GetPixHeight(); ULONG *pulTextureBase = _ptdBase->td_pulFrames + GetMipmapOffset( _iWantedMipLevel, pixBaseWidth, pixBaseHeight); pixBaseWidth >>= _iWantedMipLevel; pixBaseHeight >>= _iWantedMipLevel; mmBaseWidthMask = pixBaseWidth -1; mmBaseHeightMask = pixBaseHeight-1; ASSERT( _ptdEffect->td_pulFrames!=NULL && _ptdBase->td_pulFrames!=NULL); SWORD *pswHeightMap = (SWORD*)_ptdEffect->td_pubBuffer1; // height map pointer // copy top 2 lines from height map to bottom (so no mask offset will be needed) memcpy( (void*)(pswHeightMap+(_pixBufferHeight*_pixBufferWidth)), (void*)pswHeightMap, _pixBufferWidth*sizeof(SWORD)*2); // execute corresponding displace routine if( _pixBufferWidth >= _pixTexWidth) { // SUB-SAMPLING SLONG slHeightMapStep, slHeightRowStep; #if (defined __MSVC_INLINE__) __asm { push ebx bsf ecx,D [_pixTexWidth] dec ecx mov eax,D [_pixBufferWidth] sar eax,cl mov D [slHeightMapStep],eax bsf edx,eax add edx,DISTORTION+2-1 mov D [mmShift],edx sub eax,2 imul eax,D [_pixBufferWidth] mov D [slHeightRowStep],eax mov eax,D [pixBaseWidth] mov edx,D [pixBaseHeight] shl edx,16 or eax,edx sub eax,0x00010001 mov D [mmBaseMasks],eax mov eax,D [pixBaseWidth] shl eax,16 or eax,1 mov D [mmBaseWidth],eax mov ebx,D [pswHeightMap] mov esi,D [pulTextureBase] mov edi,D [pulTexture] pxor mm6,mm6 // MM5 = 0 | 0 || pixV | pixU mov eax,D [_pixBufferWidth] mov edx,D [_pixTexHeight] rowLoop: push edx mov ecx,D [_pixTexWidth] pixLoop: movd mm1,D [ebx] movd mm3,D [ebx+ eax*2] movq mm2,mm1 psubw mm3,mm1 pslld mm1,16 psubw mm2,mm1 pand mm2,Q [mm00M0] por mm2,mm3 psraw mm2,Q [mmShift] paddw mm2,mm6 pand mm2,Q [mmBaseMasks] pmaddwd mm2,Q [mmBaseWidth] movd edx,mm2 mov edx,D [esi+ edx*4] mov D [edi],edx // advance to next texture pixel add ebx,D [slHeightMapStep] add edi,4 paddd mm6,Q [mm0001] dec ecx jnz pixLoop // advance to next texture row pop edx add ebx,D [slHeightRowStep] paddd mm6,Q [mm0010] dec edx jnz rowLoop emms pop ebx } #elif (defined __GNU_INLINE_X86_32__) // rcg12152001 needed extra registers. :( _slHeightMapStep_renderWater = slHeightMapStep; _pixBaseWidth_renderWater = pixBaseWidth; __asm__ __volatile__ ( // this sucks :( "movl %[pixBaseHeight], %%eax \n\t" "movl %[pswHeightMap], %%ecx \n\t" "movl %[pulTexture], %%edx \n\t" "movl %[pulTextureBase], %%esi \n\t" "movl %[slHeightRowStep], %%edi \n\t" "pushl %%ebx \n\t" // GCC needs this. "movl (" ASMSYM(_pixBaseWidth_renderWater) "),%%ebx \n\t" "pushl %%eax \n\t" // pixBaseHeight "pushl %%ebx \n\t" // pixBaseWidth "pushl %%ecx \n\t" // pswHeightMap "pushl %%edx \n\t" // pulTexture "pushl %%esi \n\t" // pulTextureBase "pushl %%edi \n\t" // slHeightRowStep "bsfl (" ASMSYM(_pixTexWidth) "), %%ecx \n\t" "decl %%ecx \n\t" "movl (" ASMSYM(_pixBufferWidth) "), %%eax \n\t" "sarl %%cl, %%eax \n\t" "movl %%eax, (" ASMSYM(_slHeightMapStep_renderWater) ") \n\t" "bsfl %%eax, %%edx \n\t" "addl $4, %%edx \n\t" "movl %%edx, (" ASMSYM(mmShift) ") \n\t" "subl $2, %%eax \n\t" "imul (" ASMSYM(_pixBufferWidth) "), %%eax \n\t" "movl %%eax, (%%esp) \n\t" // slHeightRowStep "movl 16(%%esp), %%eax \n\t" // pixBaseWidth "movl 20(%%esp), %%edx \n\t" // pixBaseHeight "shll $16, %%edx \n\t" "orl %%edx, %%eax \n\t" "subl $0x00010001, %%eax \n\t" "movl %%eax, (" ASMSYM(mmBaseMasks) ") \n\t" "movl 16(%%esp), %%eax \n\t" // pixBaseWidth "shl $16, %%eax \n\t" "orl $1, %%eax \n\t" "movl %%eax, (" ASMSYM(mmBaseWidth) ") \n\t" "movl 12(%%esp), %%ebx \n\t" // pswHeightMap "movl 4(%%esp), %%esi \n\t" // pulTextureBase "movl 8(%%esp), %%edi \n\t" // pulTexture "pxor %%mm6, %%mm6 \n\t" // MM5 = 0 | 0 || pixV | pixU "movl (" ASMSYM(_pixBufferWidth) "), %%eax \n\t" "movl (" ASMSYM(_pixTexHeight) "), %%edx \n\t" "0: \n\t" // rowLoop "pushl %%edx \n\t" "movl (" ASMSYM(_pixTexWidth) "), %%ecx \n\t" "1: \n\t" // pixLoop "movd (%%ebx), %%mm1 \n\t" "movd (%%ebx, %%eax, 2), %%mm3 \n\t" "movq %%mm1, %%mm2 \n\t" "psubw %%mm1, %%mm3 \n\t" "pslld $16, %%mm1 \n\t" "psubw %%mm1, %%mm2 \n\t" "pand (" ASMSYM(mm00M0) "), %%mm2 \n\t" "por %%mm3, %%mm2 \n\t" "psraw (" ASMSYM(mmShift) "), %%mm2 \n\t" "paddw %%mm6, %%mm2 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm2 \n\t" "pmaddwd (" ASMSYM(mmBaseWidth) "), %%mm2 \n\t" "movd %%mm2, %%edx \n\t" "movl (%%esi, %%edx, 4), %%edx \n\t" "movl %%edx, (%%edi) \n\t" // advance to next texture pixel "addl (" ASMSYM(_slHeightMapStep_renderWater) "), %%ebx \n\t" "addl $4, %%edi \n\t" "paddd (" ASMSYM(mm0001) "), %%mm6 \n\t" "decl %%ecx \n\t" "jnz 1b \n\t" // pixLoop // advance to next texture row "popl %%edx \n\t" "addl (%%esp), %%ebx \n\t" // slHeightRowStep "paddd (" ASMSYM(mm0010) "), %%mm6 \n\t" "decl %%edx \n\t" "jnz 0b \n\t" // rowLoop "addl $24, %%esp \n\t" // lose our locals... "popl %%ebx \n\t" // restore GCC's register. "emms \n\t" : // no outputs. : [pixBaseHeight] "g" (pixBaseHeight), [pswHeightMap] "g" (pswHeightMap), [pulTexture] "g" (pulTexture), [pulTextureBase] "g" (pulTextureBase), [slHeightRowStep] "g" (slHeightRowStep) : FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi", "cc", "memory" ); #else PIX pixPos, pixDU, pixDV; slHeightMapStep = _pixBufferWidth/pixBaseWidth; slHeightRowStep = (slHeightMapStep-1)*_pixBufferWidth; mmShift = DISTORTION+ FastLog2(slHeightMapStep) +2; for( PIX pixV=0; pixV<_pixTexHeight; pixV++) { // row loop for( PIX pixU=0; pixU<_pixTexWidth; pixU++) { // texel loop pixPos = pswHeightMap[0]; pixDU = (pswHeightMap[1] - pixPos) >>(SLONG&)mmShift; pixDV = (pswHeightMap[_pixBufferWidth] - pixPos) >>(SLONG&)mmShift; pixDU = (pixU +pixDU) & (SLONG&)mmBaseWidthMask; pixDV = (pixV +pixDV) & (SLONG&)mmBaseHeightMask; *pulTexture++ = pulTextureBase[pixDV*pixBaseWidth + pixDU]; // advance to next texel in height map pswHeightMap += slHeightMapStep; } pswHeightMap += slHeightRowStep; } #endif } else if( _pixBufferWidth*2 == _pixTexWidth) { // BILINEAR SUPER-SAMPLING 2 #if ASMOPT == 1 #if (defined __MSVC_INLINE__) __asm { push ebx bsf eax,D [pixBaseWidth] mov edx,32 sub edx,eax mov D [mmBaseWidthShift],edx movq mm0,Q [mmBaseHeightMask] psllq mm0,32 por mm0,Q [mmBaseWidthMask] movq Q [mmBaseMasks],mm0 pxor mm6,mm6 // MM6 = pixV|pixU mov ebx,D [pswHeightMap] mov esi,D [pulTextureBase] mov edi,D [pulTexture] mov edx,D [_pixBufferHeight] rowLoop2: push edx mov edx,D [_pixTexWidth] mov ecx,D [_pixBufferWidth] pixLoop2: mov eax,D [_pixBufferWidth] movd mm1,D [ebx+ 2] movd mm0,D [ebx+ eax*2] psllq mm0,32 por mm1,mm0 movd mm0,D [ebx] punpckldq mm0,mm0 psubd mm1,mm0 movq mm0,mm6 pslld mm0,DISTORTION+1+1 paddd mm1,mm0 // MM1 = slV_00 | slU_00 movd mm2,D [ebx+ 4] movd mm0,D [ebx+ eax*2 +2] psllq mm0,32 por mm2,mm0 movd mm0,D [ebx+ 2] punpckldq mm0,mm0 psubd mm2,mm0 movq mm0,mm6 paddd mm0,Q [mm1LO] pslld mm0,DISTORTION+1+1 paddd mm2,mm0 // MM2 = slV_01 | slU_01 movd mm3,D [ebx+ eax*2 +2] movd mm0,D [ebx+ eax*4] psllq mm0,32 por mm3,mm0 movd mm0,D [ebx+ eax*2] punpckldq mm0,mm0 psubd mm3,mm0 movq mm0,mm6 paddd mm0,Q [mm1HI] pslld mm0,DISTORTION+1+1 paddd mm3,mm0 // MM3 = slV_10 | slU_10 movd mm4,D [ebx+ eax*2 +4] movd mm0,D [ebx+ eax*4 +2] psllq mm0,32 por mm4,mm0 movd mm0,D [ebx+ eax*2 +2] punpckldq mm0,mm0 psubd mm4,mm0 movq mm0,mm6 paddd mm0,Q [mm1HILO] pslld mm0,DISTORTION+1+1 paddd mm4,mm0 // MM4 = slV_11 | slU_11 movq mm0,mm1 psrad mm0,DISTORTION+1+0 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi],eax movq mm0,mm1 paddd mm0,mm2 psrad mm0,DISTORTION+1+1 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ 4],eax movq mm0,mm1 paddd mm0,mm3 psrad mm0,DISTORTION+1+1 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*4],eax paddd mm1,mm2 paddd mm1,mm3 paddd mm1,mm4 psrad mm1,DISTORTION+1+2 pand mm1,Q [mmBaseMasks] movq mm7,mm1 psrlq mm7,Q [mmBaseWidthShift] paddd mm1,mm7 movd eax,mm1 mov eax,D [esi+ eax*4] mov D [edi+ edx*4 +4],eax // advance to next texture pixels paddd mm6,Q [mm1LO] add edi,8 add ebx,2 dec ecx jnz pixLoop2 // advance to next texture row lea edi,[edi+ edx*4] pop edx paddd mm6,Q [mm1HI] dec edx jnz rowLoop2 emms pop ebx } #elif (defined __GNU_INLINE_X86_32__) __asm__ __volatile__ ( "bsfl %[pixBaseWidth], %%eax \n\t" "movl $32, %%edx \n\t" "subl %%eax, %%edx \n\t" "movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t" "movq (" ASMSYM(mmBaseHeightMask) "), %%mm0 \n\t" "psllq $32, %%mm0 \n\t" "por (" ASMSYM(mmBaseWidthMask) "), %%mm0 \n\t" "movq %%mm0, (" ASMSYM(mmBaseMasks) ") \n\t" "pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU "movl %[pswHeightMap], %%edx \n\t" "movl %[pulTextureBase], %%esi \n\t" "movl %[pulTexture], %%edi \n\t" "pushl %%ebx \n\t" // GCC's register. "movl %%edx, %%ebx \n\t" "movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t" "0: \n\t" // rowLoop2 "pushl %%edx \n\t" "movl (" ASMSYM(_pixTexWidth) "), %%edx \n\t" "movl (" ASMSYM(_pixBufferWidth) "), %%ecx \n\t" "1: \n\t" // pixLoop2 "mov (" ASMSYM(_pixBufferWidth) "), %%eax \n\t" "movd 2(%%ebx), %%mm1 \n\t" "movd 0(%%ebx, %%eax, 2), %%mm0 \n\t" "psllq $32, %%mm0 \n\t" "por %%mm0, %%mm1 \n\t" "movd (%%ebx), %%mm0 \n\t" "punpckldq %%mm0, %%mm0 \n\t" "psubd %%mm0, %%mm1 \n\t" "movq %%mm6, %%mm0 \n\t" "pslld $5, %%mm0 \n\t" "paddd %%mm0, %%mm1 \n\t" // MM1 = slV_00 | slU_00 "movd 4(%%ebx), %%mm2 \n\t" "movd 2(%%ebx, %%eax, 2), %%mm0 \n\t" "psllq $32, %%mm0 \n\t" "por %%mm0, %%mm2 \n\t" "movd 2(%%ebx), %%mm0 \n\t" "punpckldq %%mm0, %%mm0 \n\t" "psubd %%mm0, %%mm2 \n\t" "movq %%mm6, %%mm0 \n\t" "paddd (" ASMSYM(mm1LO) "), %%mm0 \n\t" "pslld $5, %%mm0 \n\t" "paddd %%mm0, %%mm2 \n\t" // MM2 = slV_01 | slU_01 "movd 2(%%ebx, %%eax, 2), %%mm3 \n\t" "movd (%%ebx, %%eax, 4), %%mm0 \n\t" "psllq $32, %%mm0 \n\t" "por %%mm0, %%mm3 \n\t" "movd (%%ebx, %%eax, 2), %%mm0 \n\t" "punpckldq %%mm0, %%mm0 \n\t" "psubd %%mm0, %%mm3 \n\t" "movq %%mm6, %%mm0 \n\t" "paddd (" ASMSYM(mm1HI) "), %%mm0 \n\t" "pslld $5, %%mm0 \n\t" "paddd %%mm0, %%mm3 \n\t" // MM3 = slV_10 | slU_10 "movd 4(%%ebx, %%eax, 2), %%mm4 \n\t" "movd 2(%%ebx, %%eax, 4), %%mm0 \n\t" "psllq $32, %%mm0 \n\t" "por %%mm0, %%mm4 \n\t" "movd 2(%%ebx, %%eax, 2), %%mm0 \n\t" "punpckldq %%mm0, %%mm0 \n\t" "psubd %%mm0, %%mm4 \n\t" "movq %%mm6, %%mm0 \n\t" "paddd (" ASMSYM(mm1HILO) "), %%mm0 \n\t" "pslld $5, %%mm0 \n\t" "paddd %%mm0, %%mm4 \n\t" // MM4 = slV_11 | slU_11 "movq %%mm1, %%mm0 \n\t" "psrad $4, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, (%%edi) \n\t" "movq %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "psrad $5, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 4(%%edi) \n\t" "movq %%mm1, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "psrad $5, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, (%%edi, %%edx, 4) \n\t" "paddd %%mm2, %%mm1 \n\t" "paddd %%mm3, %%mm1 \n\t" "paddd %%mm4, %%mm1 \n\t" "psrad $6, %%mm1 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm1 \n\t" "movq %%mm1, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm1 \n\t" "movd %%mm1, %%eax \n\t" "mov (%%esi, %%eax, 4), %%eax \n\t" "mov %%eax, 4(%%edi, %%edx, 4) \n\t" // advance to next texture pixels "paddd (" ASMSYM(mm1LO) "), %%mm6 \n\t" "addl $8, %%edi \n\t" "addl $2, %%ebx \n\t" "decl %%ecx \n\t" "jnz 1b \n\t" // pixLoop2 // advance to next texture row "leal (%%edi, %%edx, 4), %%edi \n\t" "popl %%edx \n\t" "paddd (" ASMSYM(mm1HI) "), %%mm6 \n\t" "decl %%edx \n\t" "jnz 0b \n\t" // rowLoop2 "popl %%ebx \n\t" // GCC's value. "emms \n\t" : // no outputs. : [pixBaseWidth] "g" (pixBaseWidth), [pswHeightMap] "g" (pswHeightMap), [pulTextureBase] "g" (pulTextureBase), [pulTexture] "g" (pulTexture) : FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi", "cc", "memory" ); #else #error fill in for you platform. #endif #else SLONG slU_00, slU_01, slU_10, slU_11; SLONG slV_00, slV_01, slV_10, slV_11; for( PIX pixV=0; pixV<_pixBufferHeight; pixV++) { // row loop for( PIX pixU=0; pixU<_pixBufferWidth; pixU++) { // texel loop slU_00 = pswHeightMap[_pixBufferWidth*0+1] - pswHeightMap[_pixBufferWidth*0+0] + ((pixU+0)<<(DISTORTION+1+1)); slV_00 = pswHeightMap[_pixBufferWidth*1+0] - pswHeightMap[_pixBufferWidth*0+0] + ((pixV+0)<<(DISTORTION+1+1)); slU_01 = pswHeightMap[_pixBufferWidth*0+2] - pswHeightMap[_pixBufferWidth*0+1] + ((pixU+1)<<(DISTORTION+1+1)); slV_01 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*0+1] + ((pixV+0)<<(DISTORTION+1+1)); slU_10 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*1+0] + ((pixU+0)<<(DISTORTION+1+1)); slV_10 = pswHeightMap[_pixBufferWidth*2+0] - pswHeightMap[_pixBufferWidth*1+0] + ((pixV+1)<<(DISTORTION+1+1)); slU_11 = pswHeightMap[_pixBufferWidth*1+2] - pswHeightMap[_pixBufferWidth*1+1] + ((pixU+1)<<(DISTORTION+1+1)); slV_11 = pswHeightMap[_pixBufferWidth*2+1] - pswHeightMap[_pixBufferWidth*1+1] + ((pixV+1)<<(DISTORTION+1+1)); pulTexture[_pixTexWidth*0+0] = PIXEL( (slU_00 ) >>(DISTORTION+1 ), (slV_00 ) >>(DISTORTION+1 ) ); pulTexture[_pixTexWidth*0+1] = PIXEL( (slU_00+slU_01 ) >>(DISTORTION+1+1), (slV_00+slV_01 ) >>(DISTORTION+1+1) ); pulTexture[_pixTexWidth*1+0] = PIXEL( (slU_00 +slU_10 ) >>(DISTORTION+1+1), (slV_00 +slV_10 ) >>(DISTORTION+1+1) ); pulTexture[_pixTexWidth*1+1] = PIXEL( (slU_00+slU_01+slU_10+slU_11) >>(DISTORTION+1+2), (slV_00+slV_01+slV_10+slV_11) >>(DISTORTION+1+2) ); // advance to next texel pulTexture+=2; pswHeightMap++; } pulTexture+=_pixTexWidth; } #endif } else if( _pixBufferWidth*4 == _pixTexWidth) { // BILINEAR SUPER-SAMPLING 4 #if ASMOPT == 1 #if (defined __MSVC_INLINE__) __asm { push ebx bsf eax,D [pixBaseWidth] mov edx,32 sub edx,eax mov D [mmBaseWidthShift],edx movq mm0,Q [mmBaseHeightMask] psllq mm0,32 por mm0,Q [mmBaseWidthMask] movq Q [mmBaseMasks],mm0 pxor mm6,mm6 // MM6 = pixV|pixU mov ebx,D [pswHeightMap] mov esi,D [pulTextureBase] mov edi,D [pulTexture] mov edx,D [_pixBufferHeight] rowLoop4: push edx mov ecx,D [_pixBufferWidth] pixLoop4: mov eax,D [_pixBufferWidth] mov edx,D [_pixTexWidth] movd mm1,D [ebx+ 2] movd mm0,D [ebx+ eax*2] psllq mm0,32 por mm1,mm0 movd mm0,D [ebx] punpckldq mm0,mm0 psubd mm1,mm0 movq mm0,mm6 pslld mm0,DISTORTION+1+1 paddd mm1,mm0 // MM1 = slV_00 | slU_00 movd mm2,D [ebx+ 4] movd mm0,D [ebx+ eax*2 +2] psllq mm0,32 por mm2,mm0 movd mm0,D [ebx+ 2] punpckldq mm0,mm0 psubd mm2,mm0 movq mm0,mm6 paddd mm0,Q [mm1LO] pslld mm0,DISTORTION+1+1 paddd mm2,mm0 // MM2 = slV_01 | slU_01 movd mm3,D [ebx+ eax*2 +2] movd mm0,D [ebx+ eax*4] psllq mm0,32 por mm3,mm0 movd mm0,D [ebx+ eax*2] punpckldq mm0,mm0 psubd mm3,mm0 movq mm0,mm6 paddd mm0,Q [mm1HI] pslld mm0,DISTORTION+1+1 paddd mm3,mm0 // MM3 = slV_10 | slU_10 movd mm4,D [ebx+ eax*2 +4] movd mm0,D [ebx+ eax*4 +2] psllq mm0,32 por mm4,mm0 movd mm0,D [ebx+ eax*2 +2] punpckldq mm0,mm0 psubd mm4,mm0 movq mm0,mm6 paddd mm0,Q [mm1HILO] pslld mm0,DISTORTION+1+1 paddd mm4,mm0 // MM4 = slV_11 | slU_11 // texel 00 movq mm0,mm1 psrad mm0,DISTORTION pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi],eax // texel 01 movq mm0,mm1 paddd mm0,mm1 paddd mm0,mm1 paddd mm0,mm2 psrad mm0,DISTORTION+2 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi +4],eax // texel 02 movq mm0,mm1 paddd mm0,mm2 psrad mm0,DISTORTION+1 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi +8],eax // texel 03 movq mm0,mm1 paddd mm0,mm2 paddd mm0,mm2 paddd mm0,mm2 psrad mm0,DISTORTION+2 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi +12],eax // texel 10 movq mm0,mm1 paddd mm0,mm1 paddd mm0,mm1 paddd mm0,mm3 psrad mm0,DISTORTION+2 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*4],eax // texel 11 movq mm0,mm1 pslld mm0,3 paddd mm0,mm1 paddd mm0,mm2 paddd mm0,mm2 paddd mm0,mm2 paddd mm0,mm3 paddd mm0,mm3 paddd mm0,mm3 paddd mm0,mm4 psrad mm0,DISTORTION+4 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*4 +4],eax // texel 12 movq mm0,mm1 paddd mm0,mm0 paddd mm0,mm1 paddd mm0,mm2 paddd mm0,mm2 paddd mm0,mm2 paddd mm0,mm3 paddd mm0,mm4 psrad mm0,DISTORTION+3 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*4 +8],eax // texel 13 movq mm0,mm2 pslld mm0,3 paddd mm0,mm2 paddd mm0,mm1 paddd mm0,mm1 paddd mm0,mm1 paddd mm0,mm3 paddd mm0,mm4 paddd mm0,mm4 paddd mm0,mm4 psrad mm0,DISTORTION+4 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*4 +12],eax // texel 20 movq mm0,mm1 paddd mm0,mm3 psrad mm0,DISTORTION+1 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*8],eax // texel 21 movq mm0,mm1 paddd mm0,mm1 paddd mm0,mm1 paddd mm0,mm2 paddd mm0,mm3 paddd mm0,mm3 paddd mm0,mm3 paddd mm0,mm4 psrad mm0,DISTORTION+3 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*8 +4],eax // texel 22 movq mm0,mm1 paddd mm0,mm2 paddd mm0,mm3 paddd mm0,mm4 psrad mm0,DISTORTION+2 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*8 +8],eax // texel 23 movq mm0,mm1 paddd mm0,mm2 paddd mm0,mm2 paddd mm0,mm2 paddd mm0,mm3 paddd mm0,mm4 paddd mm0,mm4 paddd mm0,mm4 psrad mm0,DISTORTION+3 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*8 +12],eax imul edx,3 // _pixTexWidth*=3 // texel 30 movq mm0,mm1 paddd mm0,mm3 paddd mm0,mm3 paddd mm0,mm3 psrad mm0,DISTORTION+2 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*4],eax // texel 31 movq mm0,mm3 pslld mm0,3 paddd mm0,mm3 paddd mm0,mm1 paddd mm0,mm1 paddd mm0,mm1 paddd mm0,mm2 paddd mm0,mm4 paddd mm0,mm4 paddd mm0,mm4 psrad mm0,DISTORTION+4 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*4 +4],eax // texel 32 movq mm0,mm4 paddd mm0,mm0 paddd mm0,mm4 paddd mm0,mm3 paddd mm0,mm3 paddd mm0,mm3 paddd mm0,mm2 paddd mm0,mm1 psrad mm0,DISTORTION+3 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*4 +8],eax // texel 33 movq mm0,mm4 pslld mm0,3 paddd mm0,mm4 paddd mm0,mm1 paddd mm0,mm2 paddd mm0,mm2 paddd mm0,mm2 paddd mm0,mm3 paddd mm0,mm3 paddd mm0,mm3 psrad mm0,DISTORTION+4 pand mm0,Q [mmBaseMasks] movq mm7,mm0 psrlq mm7,Q [mmBaseWidthShift] paddd mm0,mm7 movd eax,mm0 mov eax,D [esi+ eax*4] mov D [edi+ edx*4 +12],eax // advance to next texture pixels paddd mm6,Q [mm1LO] add edi,16 add ebx,2 dec ecx jnz pixLoop4 // advance to next texture row lea edi,[edi+ edx*4] // +=[_pixTexWidth]*3 pop edx paddd mm6,Q [mm1HI] dec edx jnz rowLoop4 emms pop ebx } #elif (defined __GNU_INLINE_X86_32__) __asm__ __volatile__ ( "bsfl %[pixBaseWidth], %%eax \n\t" "movl $32, %%edx \n\t" "subl %%eax, %%edx \n\t" "movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t" "movq (" ASMSYM(mmBaseHeightMask) "), %%mm0 \n\t" "psllq $32, %%mm0 \n\t" "por (" ASMSYM(mmBaseWidthMask) "), %%mm0 \n\t" "movq %%mm0, (" ASMSYM(mmBaseMasks) ") \n\t" "pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU "movl %[pswHeightMap], %%edx \n\t" "movl %[pulTextureBase], %%esi \n\t" "movl %[pulTexture], %%edi \n\t" "pushl %%ebx \n\t" // GCC's register. "movl %%edx, %%ebx \n\t" "movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t" "0: \n\t" // rowLoop4 "pushl %%edx \n\t" "movl (" ASMSYM(_pixBufferWidth) "), %%ecx \n\t" "1: \n\t" // pixLoop4 "movl (" ASMSYM(_pixBufferWidth) "), %%eax \n\t" "movl (" ASMSYM(_pixTexWidth) "), %%edx \n\t" "movd 2(%%ebx), %%mm1 \n\t" "movd (%%ebx, %%eax, 2), %%mm0 \n\t" "psllq $32, %%mm0 \n\t" "por %%mm0, %%mm1 \n\t" "movd (%%ebx), %%mm0 \n\t" "punpckldq %%mm0, %%mm0 \n\t" "psubd %%mm0, %%mm1 \n\t" "movq %%mm6, %%mm0 \n\t" "pslld $5, %%mm0 \n\t" "paddd %%mm0, %%mm1 \n\t" // MM1 = slV_00 | slU_00 "movd 4(%%ebx), %%mm2 \n\t" "movd 2(%%ebx, %%eax, 2), %%mm0 \n\t" "psllq $32, %%mm0 \n\t" "por %%mm0, %%mm2 \n\t" "movd 2(%%ebx), %%mm0 \n\t" "punpckldq %%mm0, %%mm0 \n\t" "psubd %%mm0, %%mm2 \n\t" "movq %%mm6, %%mm0 \n\t" "paddd (" ASMSYM(mm1LO) "), %%mm0 \n\t" "pslld $5, %%mm0 \n\t" "paddd %%mm0, %%mm2 \n\t" // MM2 = slV_01 | slU_01 "movd 2(%%ebx, %%eax, 2), %%mm3 \n\t" "movd (%%ebx, %%eax, 4), %%mm0 \n\t" "psllq $32, %%mm0 \n\t" "por %%mm0, %%mm3 \n\t" "movd (%%ebx, %%eax, 2), %%mm0 \n\t" "punpckldq %%mm0, %%mm0 \n\t" "psubd %%mm0, %%mm3 \n\t" "movq %%mm6, %%mm0 \n\t" "paddd (" ASMSYM(mm1HI) "), %%mm0 \n\t" "pslld $5, %%mm0 \n\t" "paddd %%mm0, %%mm3 \n\t" // MM3 = slV_10 | slU_10 "movd 4(%%ebx, %%eax, 2), %%mm4 \n\t" "movd 2(%%ebx, %%eax, 4), %%mm0 \n\t" "psllq $32, %%mm0 \n\t" "por %%mm0, %%mm4 \n\t" "movd 2(%%ebx, %%eax, 2), %%mm0 \n\t" "punpckldq %%mm0, %%mm0 \n\t" "psubd %%mm0, %%mm4 \n\t" "movq %%mm6, %%mm0 \n\t" "paddd (" ASMSYM(mm1HILO) "), %%mm0 \n\t" "pslld $5, %%mm0 \n\t" "paddd %%mm0, %%mm4 \n\t" // MM4 = slV_11 | slU_11 // texel 00 "movq %%mm1, %%mm0 \n\t" "psrad $3, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, (%%edi) \n\t" // texel 01 "movq %%mm1, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "psrad $5, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 4(%%edi) \n\t" // texel 02 "movq %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "psrad $4, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 8(%%edi) \n\t" // texel 03 "movq %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "psrad $5, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 12(%%edi) \n\t" // texel 10 "movq %%mm1, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "psrad $5, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, (%%edi, %%edx, 4) \n\t" // texel 11 "movq %%mm1, %%mm0 \n\t" "pslld $3, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "psrad $7, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 4(%%edi, %%edx, 4) \n\t" // texel 12 "movq %%mm1, %%mm0 \n\t" "paddd %%mm0, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "psrad $6, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 8(%%edi, %%edx, 4) \n\t" // texel 13 "movq %%mm2, %%mm0 \n\t" "pslld $3, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "psrad $7, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 12(%%edi, %%edx, 4) \n\t" // texel 20 "movq %%mm1, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "psrad $4, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, (%%edi, %%edx, 8) \n\t" // texel 21 "movq %%mm1, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "psrad $6, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 4(%%edi, %%edx, 8) \n\t" // texel 22 "movq %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "psrad $5, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 8(%%edi, %%edx, 8) \n\t" // texel 23 "movq %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "psrad $6, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 12(%%edi, %%edx, 8) \n\t" "imull $3, %%edx \n\t" // _pixTexWidth*=3 // texel 30 "movq %%mm1, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "psrad $5, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, (%%edi, %%edx, 4) \n\t" // texel 31 "movq %%mm3, %%mm0 \n\t" "pslld $3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "psrad $7, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 4(%%edi, %%edx, 4) \n\t" // texel 32 "movq %%mm4, %%mm0 \n\t" "paddd %%mm0, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "psrad $6, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 8(%%edi, %%edx, 4) \n\t" // texel 33 "movq %%mm4, %%mm0 \n\t" "pslld $3, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm1, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm2, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "paddd %%mm3, %%mm0 \n\t" "psrad $7, %%mm0 \n\t" "pand (" ASMSYM(mmBaseMasks) "), %%mm0 \n\t" "movq %%mm0, %%mm7 \n\t" "psrlq (" ASMSYM(mmBaseWidthShift) "), %%mm7 \n\t" "paddd %%mm7, %%mm0 \n\t" "movd %%mm0, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, 12(%%edi, %%edx, 4) \n\t" // advance to next texture pixels "paddd (" ASMSYM(mm1LO) "), %%mm6 \n\t" "addl $16, %%edi \n\t" "addl $2, %%ebx \n\t" "decl %%ecx \n\t" "jnz 1b \n\t" // pixLoop4 // advance to next texture row "leal (%%edi, %%edx, 4), %%edi \n\t"// +=[_pixTexWidth]*3 "popl %%edx \n\t" "paddd (" ASMSYM(mm1HI) "), %%mm6 \n\t" "decl %%edx \n\t" "jnz 0b \n\t" // rowLoop4 "popl %%ebx \n\t" // Restore GCC's value. "emms \n\t" : // no outputs. : [pixBaseWidth] "g" (pixBaseWidth), [pswHeightMap] "g" (pswHeightMap), [pulTextureBase] "g" (pulTextureBase), [pulTexture] "g" (pulTexture) : FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi", "cc", "memory" ); #else #error fill in for you platform. #endif #else SLONG slU_00, slU_01, slU_10, slU_11; SLONG slV_00, slV_01, slV_10, slV_11; mmBaseWidthShift = FastLog2( pixBaseWidth); // faster multiplying with shift for( PIX pixV=0; pixV<_pixBufferHeight; pixV++) { // row loop for( PIX pixU=0; pixU<_pixBufferWidth; pixU++) { // texel loop slU_00 = pswHeightMap[_pixBufferWidth*0+1] - pswHeightMap[_pixBufferWidth*0+0] + ((pixU+0)<<(DISTORTION+2)); slV_00 = pswHeightMap[_pixBufferWidth*1+0] - pswHeightMap[_pixBufferWidth*0+0] + ((pixV+0)<<(DISTORTION+2)); slU_01 = pswHeightMap[_pixBufferWidth*0+2] - pswHeightMap[_pixBufferWidth*0+1] + ((pixU+1)<<(DISTORTION+2)); slV_01 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*0+1] + ((pixV+0)<<(DISTORTION+2)); slU_10 = pswHeightMap[_pixBufferWidth*1+1] - pswHeightMap[_pixBufferWidth*1+0] + ((pixU+0)<<(DISTORTION+2)); slV_10 = pswHeightMap[_pixBufferWidth*2+0] - pswHeightMap[_pixBufferWidth*1+0] + ((pixV+1)<<(DISTORTION+2)); slU_11 = pswHeightMap[_pixBufferWidth*1+2] - pswHeightMap[_pixBufferWidth*1+1] + ((pixU+1)<<(DISTORTION+2)); slV_11 = pswHeightMap[_pixBufferWidth*2+1] - pswHeightMap[_pixBufferWidth*1+1] + ((pixV+1)<<(DISTORTION+2)); pulTexture[_pixTexWidth*0+0] = PIXEL( (slU_00 ) >>(DISTORTION ), (slV_00 ) >>(DISTORTION ) ); pulTexture[_pixTexWidth*0+1] = PIXEL( (slU_00* 3+slU_01* 1 ) >>(DISTORTION+2), (slV_00* 3+slV_01* 1 ) >>(DISTORTION+2) ); pulTexture[_pixTexWidth*0+2] = PIXEL( (slU_00 +slU_01 ) >>(DISTORTION+1), (slV_00 +slV_01 ) >>(DISTORTION+1) ); pulTexture[_pixTexWidth*0+3] = PIXEL( (slU_00* 1+slU_01* 3 ) >>(DISTORTION+2), (slV_00* 1+slV_01* 3 ) >>(DISTORTION+2) ); pulTexture[_pixTexWidth*1+0] = PIXEL( (slU_00* 3 +slU_10* 1 ) >>(DISTORTION+2), (slV_00* 3 +slV_10 ) >>(DISTORTION+2) ); pulTexture[_pixTexWidth*1+1] = PIXEL( (slU_00* 9+slU_01* 3+slU_10* 3+slU_11* 1) >>(DISTORTION+4), (slV_00* 9+slV_01* 3+slV_10* 3+slV_11* 1) >>(DISTORTION+4) ); pulTexture[_pixTexWidth*1+2] = PIXEL( (slU_00* 3+slU_01* 3+slU_10* 1+slU_11* 1) >>(DISTORTION+3), (slV_00* 3+slV_01* 3+slV_10* 1+slV_11* 1) >>(DISTORTION+3) ); pulTexture[_pixTexWidth*1+3] = PIXEL( (slU_00* 3+slU_01* 9+slU_10* 1+slU_11* 3) >>(DISTORTION+4), (slV_00* 3+slV_01* 9+slV_10* 1+slV_11* 3) >>(DISTORTION+4) ); pulTexture[_pixTexWidth*2+0] = PIXEL( (slU_00 +slU_10 ) >>(DISTORTION+1), (slV_00 +slV_10 ) >>(DISTORTION+1) ); pulTexture[_pixTexWidth*2+1] = PIXEL( (slU_00* 3+slU_01* 1+slU_10* 3+slU_11* 1) >>(DISTORTION+3), (slV_00* 3+slV_01* 1+slV_10* 3+slV_11* 1) >>(DISTORTION+3) ); pulTexture[_pixTexWidth*2+2] = PIXEL( (slU_00 +slU_01 +slU_10 +slU_11 ) >>(DISTORTION+2), (slV_00 +slV_01 +slV_10 +slV_11 ) >>(DISTORTION+2) ); pulTexture[_pixTexWidth*2+3] = PIXEL( (slU_00* 1+slU_01* 3+slU_10* 1+slU_11* 3) >>(DISTORTION+3), (slV_00* 1+slV_01* 3+slV_10* 1+slV_11* 3) >>(DISTORTION+3) ); pulTexture[_pixTexWidth*3+0] = PIXEL( (slU_00* 1 +slU_10* 3 ) >>(DISTORTION+2), (slV_00* 1 +slV_10* 3 ) >>(DISTORTION+2) ); pulTexture[_pixTexWidth*3+1] = PIXEL( (slU_00* 3+slU_01* 1+slU_10* 9+slU_11* 3) >>(DISTORTION+4), (slV_00* 3+slV_01* 1+slV_10* 9+slV_11* 3) >>(DISTORTION+4) ); pulTexture[_pixTexWidth*3+2] = PIXEL( (slU_00* 1+slU_01* 1+slU_10* 3+slU_11* 3) >>(DISTORTION+3), (slV_00* 1+slV_01* 1+slV_10* 3+slV_11* 3) >>(DISTORTION+3) ); pulTexture[_pixTexWidth*3+3] = PIXEL( (slU_00* 1+slU_01* 3+slU_10* 3+slU_11* 9) >>(DISTORTION+4), (slV_00* 1+slV_01* 3+slV_10* 3+slV_11* 9) >>(DISTORTION+4) ); // advance to next texel pulTexture+=4; pswHeightMap++; } pulTexture+=_pixTexWidth*3; } #endif } else { // DO NOTHING ASSERTALWAYS( "Effect textures larger than 256 pixels aren't supported"); } _sfStats.StopTimer(CStatForm::STI_EFFECTRENDER); } #pragma warning(default: 4731) ///////////////// Fire void InitializeFire(void) { Randomize( (ULONG)(_pTimer->GetHighPrecisionTimer().GetMilliseconds())); } enum PlasmaType { ptNormal = 0, ptUp, ptUpTile, ptDown, ptDownTile }; /******************************* Plasma Animation ********************************/ static void AnimatePlasma( SLONG slDensity, PlasmaType eType) { _sfStats.StartTimer(CStatForm::STI_EFFECTRENDER); /////////////////////////////////// move plasma UBYTE *pNew = (UBYTE*)_ptdEffect->td_pubBuffer1; UBYTE *pOld = (UBYTE*)_ptdEffect->td_pubBuffer2; PIX pixV, pixU; PIX pixOffset; SLONG slLineAbove, slLineBelow, slLineLeft, slLineRight; ULONG ulNew; // -------------------------- // Normal plasma // -------------------------- if (eType == ptNormal) { // inner rectangle (without 1 pixel border) pixOffset = _pixBufferWidth; for( pixV=1; pixV<_pixBufferHeight-1; pixV++) { for( pixU=0; pixU<_pixBufferWidth; pixU++) { ulNew = ((((ULONG)pOld[pixOffset - _pixBufferWidth] + (ULONG)pOld[pixOffset + _pixBufferWidth] + (ULONG)pOld[pixOffset - 1] + (ULONG)pOld[pixOffset + 1] )>>2) + (ULONG)pOld[pixOffset] )>>1; pNew[pixOffset] = ulNew - (ulNew >> slDensity); pixOffset++; } } // upper horizontal border (without corners) slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1; slLineBelow = _pixBufferWidth + 1; slLineLeft = 0; slLineRight = 2; pixOffset = 1; for( pixU=_pixBufferWidth-2; pixU>0; pixU--) { ulNew = ((((ULONG)pOld[slLineAbove] + (ULONG)pOld[slLineBelow] + (ULONG)pOld[slLineLeft] + (ULONG)pOld[slLineRight] )>>2) + (ULONG)pOld[pixOffset] )>>1; pNew[pixOffset] = ulNew - (ulNew >> slDensity); slLineAbove++; slLineBelow++; slLineLeft++; slLineRight++; pixOffset++; } // lower horizontal border (without corners) slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1; slLineBelow = 1; slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth; slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2; pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1; for( pixU=_pixBufferWidth-2; pixU>0; pixU--) { ulNew = ((((ULONG)pOld[slLineAbove] + (ULONG)pOld[slLineBelow] + (ULONG)pOld[slLineLeft] + (ULONG)pOld[slLineRight] )>>2) + (ULONG)pOld[pixOffset] )>>1; pNew[pixOffset] = ulNew - (ulNew >> slDensity); slLineAbove++; slLineBelow++; slLineLeft++; slLineRight++; pixOffset++; } // corner ( 0, 0) ulNew = ((((ULONG)pOld[_pixBufferWidth] + (ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] + (ULONG)pOld[1] + (ULONG)pOld[_pixBufferWidth-1] )>>2) + (ULONG)pOld[0] )>>1; pNew[0] = ulNew - (ulNew >> slDensity); // corner ( 0, _pixBufferWidth) ulNew = ((((ULONG)pOld[(2*_pixBufferWidth) - 1] + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] + (ULONG)pOld[0] + (ULONG)pOld[_pixBufferWidth-2] )>>2) + (ULONG)pOld[_pixBufferWidth-1] )>>1; pNew[_pixBufferWidth-1] = ulNew - (ulNew >> slDensity); // corner ( _pixBufferHeight, 0) ulNew = ((((ULONG)pOld[0] + (ULONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth] + (ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1] + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] )>>2) + (ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] )>>1; pNew[(_pixBufferHeight-1)*_pixBufferWidth] = ulNew - (ulNew >> slDensity); // corner ( _pixBufferHeight, _pixBufferWidth) ulNew = ((((ULONG)pOld[_pixBufferWidth-1] + (ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1] + (ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2] )>>2) + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] )>>1; pNew[(_pixBufferHeight*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity); // -------------------------- // Plasma going up // -------------------------- } else if (eType==ptUp || eType==ptUpTile) { // inner rectangle (without 1 pixel border) pixOffset = _pixBufferWidth; for( pixV=1; pixV<_pixBufferHeight-1; pixV++) { for( pixU=0; pixU<_pixBufferWidth; pixU++) { ulNew = ((((ULONG)pOld[pixOffset - _pixBufferWidth] + (ULONG)pOld[pixOffset + _pixBufferWidth] + (ULONG)pOld[pixOffset - 1] + (ULONG)pOld[pixOffset + 1] )>>2) + (ULONG)pOld[pixOffset] )>>1; pNew[pixOffset-_pixBufferWidth] = ulNew - (ulNew >> slDensity); pixOffset++; } } // tile if (eType==ptUpTile) { // upper horizontal border (without corners) slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1; slLineBelow = _pixBufferWidth + 1; slLineLeft = 0; slLineRight = 2; pixOffset = 1; for( pixU=_pixBufferWidth-2; pixU>0; pixU--) { ulNew = ((((ULONG)pOld[slLineAbove] + (ULONG)pOld[slLineBelow] + (ULONG)pOld[slLineLeft] + (ULONG)pOld[slLineRight] )>>2) + (ULONG)pOld[pixOffset] )>>1; pNew[slLineAbove] = ulNew - (ulNew >> slDensity); slLineAbove++; slLineBelow++; slLineLeft++; slLineRight++; pixOffset++; } // lower horizontal border (without corners) slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1; slLineBelow = 1; slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth; slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2; pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1; for( pixU=_pixBufferWidth-2; pixU>0; pixU--) { ulNew = ((((ULONG)pOld[slLineAbove] + (ULONG)pOld[slLineBelow] + (ULONG)pOld[slLineLeft] + (ULONG)pOld[slLineRight] )>>2) + (ULONG)pOld[pixOffset] )>>1; pNew[slLineAbove] = ulNew - (ulNew >> slDensity); slLineAbove++; slLineBelow++; slLineLeft++; slLineRight++; pixOffset++; } // corner ( 0, 0) ulNew = ((((ULONG)pOld[_pixBufferWidth] + (ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] + (ULONG)pOld[1] + (ULONG)pOld[_pixBufferWidth-1] )>>2) + (ULONG)pOld[0] )>>1; pNew[(_pixBufferHeight-1)*_pixBufferWidth] = ulNew - (ulNew >> slDensity); // corner ( 0, _pixBufferWidth) ulNew = ((((ULONG)pOld[(2*_pixBufferWidth) - 1] + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] + (ULONG)pOld[0] + (ULONG)pOld[_pixBufferWidth-2] )>>2) + (ULONG)pOld[_pixBufferWidth-1] )>>1; pNew[(_pixBufferHeight*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity); // corner ( _pixBufferHeight, 0) ulNew = ((((ULONG)pOld[0] + (ULONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth] + (ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1] + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] )>>2) + (ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] )>>1; pNew[(_pixBufferHeight-2)*_pixBufferWidth] = ulNew - (ulNew >> slDensity); // corner ( _pixBufferHeight, _pixBufferWidth) ulNew = ((((ULONG)pOld[_pixBufferWidth-1] + (ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1] + (ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2] )>>2) + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] )>>1; pNew[((_pixBufferHeight-1)*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity); } // -------------------------- // Plasma going down // -------------------------- } else if (eType==ptDown || eType==ptDownTile) { // inner rectangle (without 1 pixel border) pixOffset = _pixBufferWidth; for( pixV=1; pixV<_pixBufferHeight-1; pixV++) { for( pixU=0; pixU<_pixBufferWidth; pixU++) { ulNew = ((((ULONG)pOld[pixOffset - _pixBufferWidth] + (ULONG)pOld[pixOffset + _pixBufferWidth] + (ULONG)pOld[pixOffset - 1] + (ULONG)pOld[pixOffset + 1] )>>2) + (ULONG)pOld[pixOffset] )>>1; pNew[pixOffset+_pixBufferWidth] = ulNew - (ulNew >> slDensity); pixOffset++; } } // tile if (eType==ptDownTile) { // upper horizontal border (without corners) slLineAbove = ((_pixBufferHeight-1)*_pixBufferWidth) + 1; slLineBelow = _pixBufferWidth + 1; slLineLeft = 0; slLineRight = 2; pixOffset = 1; for( pixU=_pixBufferWidth-2; pixU>0; pixU--) { ulNew = ((((ULONG)pOld[slLineAbove] + (ULONG)pOld[slLineBelow] + (ULONG)pOld[slLineLeft] + (ULONG)pOld[slLineRight] )>>2) + (ULONG)pOld[pixOffset] )>>1; pNew[slLineBelow] = ulNew - (ulNew >> slDensity); slLineAbove++; slLineBelow++; slLineLeft++; slLineRight++; pixOffset++; } // lower horizontal border (without corners) slLineAbove = ((_pixBufferHeight-2)*_pixBufferWidth) + 1; slLineBelow = 1; slLineLeft = (_pixBufferHeight-1)*_pixBufferWidth; slLineRight = ((_pixBufferHeight-1)*_pixBufferWidth) + 2; pixOffset = ((_pixBufferHeight-1)*_pixBufferWidth) + 1; for( pixU=_pixBufferWidth-2; pixU>0; pixU--) { ulNew = ((((ULONG)pOld[slLineAbove] + (ULONG)pOld[slLineBelow] + (ULONG)pOld[slLineLeft] + (ULONG)pOld[slLineRight] )>>2) + (ULONG)pOld[pixOffset] )>>1; pNew[slLineBelow] = ulNew - (ulNew >> slDensity); slLineAbove++; slLineBelow++; slLineLeft++; slLineRight++; pixOffset++; } // corner ( 0, 0) ulNew = ((((ULONG)pOld[_pixBufferWidth] + (ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] + (ULONG)pOld[1] + (ULONG)pOld[_pixBufferWidth-1] )>>2) + (ULONG)pOld[0] )>>1; pNew[_pixBufferWidth] = ulNew - (ulNew >> slDensity); // corner ( 0, _pixBufferWidth) ulNew = ((((ULONG)pOld[(2*_pixBufferWidth) - 1] + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] + (ULONG)pOld[0] + (ULONG)pOld[_pixBufferWidth-2] )>>2) + (ULONG)pOld[_pixBufferWidth-1] )>>1; pNew[(2*_pixBufferWidth) - 1] = ulNew - (ulNew >> slDensity); // corner ( _pixBufferHeight, 0) ulNew = ((((ULONG)pOld[0] + (ULONG)pOld[(_pixBufferHeight-2)*_pixBufferWidth] + (ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) + 1] + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] )>>2) + (ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] )>>1; pNew[0] = ulNew - (ulNew >> slDensity); // corner ( _pixBufferHeight, _pixBufferWidth) ulNew = ((((ULONG)pOld[_pixBufferWidth-1] + (ULONG)pOld[((_pixBufferHeight-1)*_pixBufferWidth) - 1] + (ULONG)pOld[(_pixBufferHeight-1)*_pixBufferWidth] + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 2] )>>2) + (ULONG)pOld[(_pixBufferHeight*_pixBufferWidth) - 1] )>>1; pNew[_pixBufferWidth-1] = ulNew - (ulNew >> slDensity); } } // swap buffers Swap( _ptdEffect->td_pubBuffer1, _ptdEffect->td_pubBuffer2); _sfStats.StopTimer(CStatForm::STI_EFFECTRENDER); } /******************************* Fire Animation ********************************/ static void AnimateFire( SLONG slDensity) { // _sfStats.StartTimer(CStatForm::STI_EFFECTRENDER); /////////////////////////////////// move fire // use only one buffer (otherwise it's not working) UBYTE *pubNew = (UBYTE*)_ptdEffect->td_pubBuffer2; SLONG slBufferMask = _pixBufferWidth*_pixBufferHeight -1; #if ASMOPT == 1 SLONG slColumnModulo = _pixBufferWidth*(_pixBufferHeight-2) -1; #if (defined __MSVC_INLINE__) __asm { push ebx mov edi,D [ulRNDSeed] ;// EDI = randomizer mov esi,D [pubNew] xor ebx,ebx colLoopFM: mov ecx,D [_pixBufferHeight] sub ecx,2 rowLoopFM: mov edx,D [_pixBufferWidth] add edx,esi movzx eax,B [ebx+ edx] add edx,D [_pixBufferWidth] movzx edx,B [ebx+ edx] add eax,edx shr eax,1 cmp eax,D [slDensity] jg doCalc mov B [esi+ebx],0 jmp pixDone doCalc: mov edx,edi sar edx,16 and edx,D [slDensity] sub eax,edx movsx edx,B [asbMod3Sub1Table +edx] add edx,ebx and edx,D [slBufferMask] mov B [esi+edx],al imul edi,262147 pixDone: // advance to next row add ebx,D [_pixBufferWidth] dec ecx jnz rowLoopFM // advance to next column sub ebx,D [slColumnModulo] cmp ebx,D [_pixBufferWidth] jl colLoopFM // all done mov D [ulRNDSeed],edi pop ebx } #elif (defined __GNU_INLINE_X86_32__) __asm__ __volatile__ ( "movl %[slColumnModulo], %%edx \n\t" "movl %[slBufferMask], %%ecx \n\t" "movl %[slDensity], %%eax \n\t" "movl (" ASMSYM(ulRNDSeed) "), %%edi \n\t" "pushl %%ebx \n\t" // GCC's register. "xorl %%ebx, %%ebx \n\t" "pushl %%edx \n\t" // slColumnModulo "pushl %%ecx \n\t" // slBufferMask "pushl %%eax \n\t" // slDensity "0: \n\t" // colLoopFM "movl (" ASMSYM(_pixBufferHeight) "), %%ecx \n\t" "subl $2, %%ecx \n\t" "1: \n\t" // rowLoopFM "movl (" ASMSYM(_pixBufferWidth) "), %%edx \n\t" "addl %[pubNew], %%edx \n\t" "movzbl (%%ebx, %%edx), %%eax \n\t" "addl (" ASMSYM(_pixBufferWidth) "), %%edx \n\t" "movzbl (%%ebx, %%edx), %%edx \n\t" "addl %%edx, %%eax \n\t" "shrl $1, %%eax \n\t" "cmpl (%%esp), %%eax \n\t" "jg doCalc_animateFire \n\t" "movb $0, (%[pubNew], %%ebx) \n\t" "jmp pixDone_animateFire \n\t" "doCalc_animateFire: \n\t" "movl %%edi, %%edx \n\t" "sarl $16, %%edx \n\t" "andl (%%esp), %%edx \n\t" "subl %%edx, %%eax \n\t" "movsbl " ASMSYM(asbMod3Sub1Table) "(%%edx), %%edx \n\t" "addl %%ebx, %%edx \n\t" "andl 4(%%esp), %%edx \n\t" // slBufferMask "movb %%al, (%[pubNew], %%edx) \n\t" "imull $262147, %%edi \n\t" "pixDone_animateFire: \n\t" // advance to next row "addl (" ASMSYM(_pixBufferWidth) "), %%ebx \n\t" "decl %%ecx \n\t" "jnz 1b \n\t" // rowLoopFM // advance to next column "subl 8(%%esp), %%ebx \n\t" // slColumnModulo "cmpl (" ASMSYM(_pixBufferWidth) "), %%ebx \n\t" "jl 0b \n\t" // colLoopFM // all done "movl %%edi, (" ASMSYM(ulRNDSeed) ") \n\t" "addl $12, %%esp \n\t" // lose our locals. "popl %%ebx \n\t" // Restore GCC's var. : // no outputs. : [slBufferMask] "g" (slBufferMask), [slColumnModulo] "g" (slColumnModulo), [pubNew] "r" (pubNew), [slDensity] "g" (slDensity) : "eax", "ecx", "edx", "edi", "cc", "memory" ); #else #error fill in for you platform. #endif #else // inner rectangle (without 1 pixel border) for( PIX pixU=0; pixU<_pixBufferWidth; pixU++) { SLONG slOffset = pixU; for( PIX pixV=1; pixV<_pixBufferHeight-1; pixV++) { ULONG ulNew = ((ULONG)pubNew[_pixBufferWidth+slOffset] + (ULONG)pubNew[_pixBufferWidth*2+slOffset]) >>1; if( ulNew>slDensity) { ULONG ulNewDensity = RNDW&slDensity; ulNew -= ulNewDensity; SLONG slDifusion = (SLONG)asbMod3Sub1Table[ulNewDensity]; // (SLONG)(ulNewDensity%3-1); SLONG slPos = (slDifusion+slOffset) & slBufferMask; pubNew[slPos] = ulNew; } else { pubNew[slOffset] = 0; } slOffset += _pixBufferWidth; } } #endif // _sfStats.StopTimer(CStatForm::STI_EFFECTRENDER); } //////////////////////////// displace texture UBYTE *_pubHeat_RenderPlasmaFire = NULL; static void RenderPlasmaFire(void) { // _sfStats.StartTimer(CStatForm::STI_EFFECTRENDER); // get and adjust textures' parameters PIX pixBaseWidth = _ptdBase->GetPixWidth(); ULONG *pulTextureBase = _ptdBase->td_pulFrames; ULONG *pulTexture = _ptdEffect->td_pulFrames; ASSERT( _ptdEffect->td_pulFrames!=NULL && _ptdBase->td_pulFrames!=NULL && pixBaseWidth<=256); UBYTE *pubHeat = (UBYTE*)_ptdEffect->td_pubBuffer2; // heat map pointer SLONG slHeatMapStep = _pixBufferWidth/_pixTexWidth; SLONG slHeatRowStep = (slHeatMapStep-1)*_pixBufferWidth; SLONG slBaseMipShift = 8 - FastLog2(pixBaseWidth); #if ASMOPT == 1 #if (defined __MSVC_INLINE__) __asm { push ebx mov ebx,D [pubHeat] mov esi,D [pulTextureBase] mov edi,D [pulTexture] mov ecx,D [_pixTexHeight] rowLoopF: push ecx mov edx,D [_pixTexWidth] mov ecx,D [slBaseMipShift] pixLoopF: movzx eax,B [ebx] shr eax,cl mov eax,D [esi+ eax*4] mov D [edi],eax // advance to next pixel add ebx,D [slHeatMapStep] add edi,4 dec edx jnz pixLoopF // advance to next row pop ecx add ebx,D [slHeatRowStep] dec ecx jnz rowLoopF pop ebx } #elif (defined __GNU_INLINE_X86_32__) _pubHeat_RenderPlasmaFire = pubHeat; // ran out of registers. :/ __asm__ __volatile__ ( "movl %[slHeatRowStep], %%eax \n\t" "movl %[slHeatMapStep], %%edx \n\t" "movl %[slBaseMipShift], %%ecx \n\t" "movl %[pulTextureBase], %%esi \n\t" "movl %[pulTexture], %%edi \n\t" "pushl %%ebx \n\t" "movl (" ASMSYM(_pubHeat_RenderPlasmaFire) "),%%ebx \n\t" "pushl %%eax \n\t" // slHeatRowStep "pushl %%edx \n\t" // slHeatMapStep "pushl %%ecx \n\t" // slBaseMipShift "movl (" ASMSYM(_pixTexHeight) "), %%ecx \n\t" "0: \n\t" // rowLoopF "pushl %%ecx \n\t" "movl (" ASMSYM(_pixTexWidth) "), %%edx \n\t" "movl 4(%%esp), %%ecx \n\t" // slBaseMipShift "1: \n\t" // pixLoopF "movzbl (%%ebx), %%eax \n\t" "shrl %%cl, %%eax \n\t" "movl (%%esi, %%eax, 4), %%eax \n\t" "movl %%eax, (%%edi) \n\t" // advance to next pixel "addl 8(%%esp), %%ebx \n\t" // slHeatMapStep "addl $4, %%edi \n\t" "decl %%edx \n\t" "jnz 1b \n\t" // pixLoopF // advance to next row "popl %%ecx \n\t" "addl 8(%%esp), %%ebx \n\t" // slHeatRowStep "decl %%ecx \n\t" "jnz 0b \n\t" // rowLoopF "addl $12, %%esp \n\t" // lose our locals. "popl %%ebx \n\t" // restore GCC's register. : // no outputs. : [pulTextureBase] "g" (pulTextureBase), [pulTexture] "g" (pulTexture), [slBaseMipShift] "g" (slBaseMipShift), [slHeatRowStep] "g" (slHeatRowStep), [slHeatMapStep] "g" (slHeatMapStep) : "eax", "ecx", "edx", "esi", "edi", "cc", "memory" ); #else #error fill in for you platform. #endif #else INDEX iPalette; for( INDEX pixV=0; pixV<_pixTexHeight; pixV++) { // for every pixel in horizontal line for( INDEX pixU=0; pixU<_pixTexWidth; pixU++) { iPalette = (*pubHeat)>>slBaseMipShift; *pulTexture++ = pulTextureBase[iPalette]; pubHeat += slHeatMapStep; } pubHeat += slHeatRowStep; } #endif // _sfStats.StopTimer(CStatForm::STI_EFFECTRENDER); } ///////////////////////////////////////////////////////////////////// // EFFECT TABLES ///////////////////////////////////////////////////////////////////// struct TextureEffectSourceType atestWater[] = { { "Raindrops", InitializeRaindropsStandard, AnimateRaindropsStandard }, { "RaindropsBig", InitializeRaindropsBig, AnimateRaindropsBig }, { "RaindropsSmall", InitializeRaindropsSmall, AnimateRaindropsSmall }, { "Random Surfer", InitializeRandomSurfer, AnimateRandomSurfer }, { "Oscilator", InitializeOscilator, AnimateOscilator }, { "Vertical Line", InitializeVertLine, AnimateVertLine }, { "Horizontal Line", InitializeHortLine, AnimateHortLine }, }; struct TextureEffectSourceType atestFire[] = { { "Point", InitializeFirePoint, AnimateFirePoint }, { "Random Point", InitializeRandomFirePoint, AnimateRandomFirePoint }, { "Shake Point", InitializeFireShakePoint, AnimateFireShakePoint }, { "Fire Place", InitializeFirePlace, AnimateFirePlace }, { "Roler", InitializeFireRoler, AnimateFireRoler }, { "Fall", InitializeFireFall, AnimateFireFall }, { "Fountain", InitializeFireFountain, AnimateFireFountain }, { "Side Fountain", InitializeFireSideFountain, AnimateFireSideFountain }, { "Lightning", InitializeFireLightning, AnimateFireLightning }, { "Lightning Ball", InitializeFireLightningBall, AnimateFireLightningBall }, { "Smoke", InitializeFireSmoke, AnimateFireSmoke }, }; inline void AWaterFast(void) { AnimateWater(2); }; inline void AWaterMedium(void) { AnimateWater(3); }; inline void AWaterSlow(void) { AnimateWater(5); }; inline void APlasma(void) { AnimatePlasma(4, ptNormal); }; inline void APlasmaUp(void) { AnimatePlasma(4, ptUp); }; inline void APlasmaUpTile(void) { AnimatePlasma(4, ptUpTile); }; inline void APlasmaDown(void) { AnimatePlasma(5, ptDown); }; inline void APlasmaDownTile(void) { AnimatePlasma(5, ptDownTile); }; inline void APlasmaUpSlow(void) { AnimatePlasma(6, ptUp); }; inline void AFire(void) { AnimateFire(15); }; struct TextureEffectGlobalType _ategtTextureEffectGlobalPresets[] = { { "Water Fast", InitializeWater, AWaterFast, sizeof(atestWater)/sizeof(atestWater[0]), atestWater }, { "Water Medium", InitializeWater, AWaterMedium, sizeof(atestWater)/sizeof(atestWater[0]), atestWater }, { "Water Slow", InitializeWater, AWaterSlow, sizeof(atestWater)/sizeof(atestWater[0]), atestWater }, { "", InitializeWater, AWaterSlow, sizeof(atestWater)/sizeof(atestWater[0]), atestWater }, { "Plasma Tile", InitializeFire, APlasma, sizeof(atestFire)/sizeof(atestFire[0]), atestFire }, { "Plasma Up", InitializeFire, APlasmaUp, sizeof(atestFire)/sizeof(atestFire[0]), atestFire }, { "Plasma Up Tile", InitializeFire, APlasmaUpTile, sizeof(atestFire)/sizeof(atestFire[0]), atestFire }, { "Plasma Down", InitializeFire, APlasmaDown, sizeof(atestFire)/sizeof(atestFire[0]), atestFire }, { "Plasma Down Tile", InitializeFire, APlasmaDownTile, sizeof(atestFire)/sizeof(atestFire[0]), atestFire }, { "Plasma Up Slow", InitializeFire, APlasmaUpSlow, sizeof(atestFire)/sizeof(atestFire[0]), atestFire }, { "Fire", InitializeFire, AFire, sizeof(atestFire)/sizeof(atestFire[0]), atestFire }, }; INDEX _ctTextureEffectGlobalPresets = sizeof(_ategtTextureEffectGlobalPresets) / sizeof(_ategtTextureEffectGlobalPresets[0]); // get effect type (TRUE if water type effect, FALSE if plasma or fire effect) BOOL CTextureEffectGlobal::IsWater(void) { return( _ategtTextureEffectGlobalPresets[teg_ulEffectType].tegt_Initialize == InitializeWater); } // default constructor CTextureEffectGlobal::CTextureEffectGlobal(CTextureData *ptdTexture, ULONG ulGlobalEffect) { // remember global effect's texture data for cross linking teg_ptdTexture = ptdTexture; teg_ulEffectType = ulGlobalEffect; // init for animating _ategtTextureEffectGlobalPresets[teg_ulEffectType].tegt_Initialize(); // make sure the texture will be updated next time when used teg_updTexture.Invalidate(); } // add new effect source. void CTextureEffectGlobal::AddEffectSource( ULONG ulEffectSourceType, PIX pixU0, PIX pixV0, PIX pixU1, PIX pixV1) { CTextureEffectSource* ptesNew = teg_atesEffectSources.New(1); ptesNew->Initialize(this, ulEffectSourceType, pixU0, pixV0, pixU1, pixV1); } // animate effect texture void CTextureEffectGlobal::Animate(void) { // if not set yet (funny word construction:) if( !bTableSet) { // set table for fast modulo 3 minus 1 for( INDEX i=0; i<256; i++) asbMod3Sub1Table[i]=(SBYTE)((i%3)-1); bTableSet = TRUE; } // setup some internal vars _ptdEffect = teg_ptdTexture; _pixBufferWidth = _ptdEffect->td_pixBufferWidth; _pixBufferHeight = _ptdEffect->td_pixBufferHeight; _ulBufferMask = _pixBufferHeight*_pixBufferWidth -1; // remember buffer pointers _pubDrawBuffer=(UBYTE*)_ptdEffect->td_pubBuffer2; _pswDrawBuffer=(SWORD*)_ptdEffect->td_pubBuffer2; // for each effect source FOREACHINDYNAMICARRAY( teg_atesEffectSources, CTextureEffectSource, itEffectSource) { // let it animate itself itEffectSource->Animate(); } // use animation function for this global effect type _ategtTextureEffectGlobalPresets[teg_ulEffectType].tegt_Animate(); // remember that it was calculated teg_updTexture.MarkUpdated(); } #pragma warning(disable: 4731) // render effect texture void CTextureEffectGlobal::Render( INDEX iWantedMipLevel, PIX pixTexWidth, PIX pixTexHeight) { // setup some internal vars _ptdEffect = teg_ptdTexture; _ptdBase = teg_ptdTexture->td_ptdBaseTexture; _pixBufferWidth = _ptdEffect->td_pixBufferWidth; _pixBufferHeight = _ptdEffect->td_pixBufferHeight; if( IsWater()) { // use water rendering routine _pixTexWidth = pixTexWidth; _pixTexHeight = pixTexHeight; _iWantedMipLevel = iWantedMipLevel; RenderWater(); } else { // use plasma & fire rendering routine _pixTexWidth = _ptdEffect->GetWidth() >>iWantedMipLevel; _pixTexHeight = _ptdEffect->GetHeight() >>iWantedMipLevel; RenderPlasmaFire(); } } #pragma warning(default: 4731) // returns number of second it took to render effect texture DOUBLE CTextureEffectGlobal::GetRenderingTime(void) { return( _sfStats.sf_astTimers[CStatForm::STI_EFFECTRENDER].st_tvElapsed.GetSeconds()); }