mirror of
https://github.com/ptitSeb/Serious-Engine
synced 2025-01-27 04:40:58 +01:00
fix Graphics asm
same stuff as in previous commit
This commit is contained in:
parent
a1b84b3ba0
commit
b1837c2e58
|
@ -426,7 +426,7 @@ COLOR MulColors( COLOR col1, COLOR col2)
|
|||
"orl %%eax, %%ebx \n\t"
|
||||
"movl %%ebx, %%ecx \n\t"
|
||||
"popl %%ebx \n\t"
|
||||
: "=c" (colRet)
|
||||
: "=&c" (colRet)
|
||||
: "S" (col1), "D" (col2)
|
||||
: "eax", "edx", "cc", "memory"
|
||||
);
|
||||
|
@ -536,18 +536,18 @@ COLOR AddColors( COLOR col1, COLOR col2)
|
|||
}
|
||||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
ULONG tmp;
|
||||
__asm__ __volatile__ (
|
||||
"pushl %%ebx \n\t"
|
||||
"pushl %%edi \n\t"
|
||||
"pushl %%esi \n\t"
|
||||
"xorl %%ebx, %%ebx \n\t"
|
||||
// if xbx is "r", gcc runs out of regs in -fPIC + -fno-omit-fp :(
|
||||
//"xorl %[xbx], %[xbx] \n\t"
|
||||
"movl $0, %[xbx] \n\t"
|
||||
"mov $255, %%esi \n\t"
|
||||
|
||||
// red
|
||||
"movl (%%esp), %%eax \n\t"
|
||||
"movl %[col1], %%eax \n\t"
|
||||
"andl $0xFF000000, %%eax \n\t"
|
||||
"shrl $24, %%eax \n\t"
|
||||
"movl 4(%%esp), %%edx \n\t"
|
||||
"movl %[col2], %%edx \n\t"
|
||||
"andl $0xFF000000, %%edx \n\t"
|
||||
"shrl $24, %%edx \n\t"
|
||||
"addl %%edx, %%eax \n\t"
|
||||
|
@ -556,13 +556,13 @@ COLOR AddColors( COLOR col1, COLOR col2)
|
|||
"orl %%ecx, %%eax \n\t"
|
||||
"shll $24, %%eax \n\t"
|
||||
"andl $0xFF000000, %%eax \n\t"
|
||||
"orl %%eax, %%ebx \n\t"
|
||||
"orl %%eax, %[xbx] \n\t"
|
||||
|
||||
// green
|
||||
"movl (%%esp), %%eax \n\t"
|
||||
"movl %[col1], %%eax \n\t"
|
||||
"andl $0x00FF0000, %%eax \n\t"
|
||||
"shrl $16, %%eax \n\t"
|
||||
"movl 4(%%esp), %%edx \n\t"
|
||||
"movl %[col2], %%edx \n\t"
|
||||
"andl $0x00FF0000, %%edx \n\t"
|
||||
"shrl $16, %%edx \n\t"
|
||||
"addl %%edx, %%eax \n\t"
|
||||
|
@ -571,13 +571,13 @@ COLOR AddColors( COLOR col1, COLOR col2)
|
|||
"orl %%ecx, %%eax \n\t"
|
||||
"shll $16, %%eax \n\t"
|
||||
"andl $0x00FF0000, %%eax \n\t"
|
||||
"orl %%eax, %%ebx \n\t"
|
||||
"orl %%eax, %[xbx] \n\t"
|
||||
|
||||
// blue
|
||||
"movl (%%esp), %%eax \n\t"
|
||||
"movl %[col1], %%eax \n\t"
|
||||
"andl $0x0000FF00, %%eax \n\t"
|
||||
"shrl $8, %%eax \n\t"
|
||||
"movl 4(%%esp), %%edx \n\t"
|
||||
"movl %[col2], %%edx \n\t"
|
||||
"andl $0x0000FF00, %%edx \n\t"
|
||||
"shrl $8, %%edx \n\t"
|
||||
"addl %%edx, %%eax \n\t"
|
||||
|
@ -586,13 +586,13 @@ COLOR AddColors( COLOR col1, COLOR col2)
|
|||
"orl %%ecx, %%eax \n\t"
|
||||
"shll $8, %%eax \n\t"
|
||||
"andl $0x0000FF00, %%eax \n\t"
|
||||
"orl %%eax, %%ebx \n\t"
|
||||
"orl %%eax, %[xbx] \n\t"
|
||||
|
||||
// alpha
|
||||
"movl (%%esp), %%eax \n\t"
|
||||
"movl %[col1], %%eax \n\t"
|
||||
"andl $0x000000FF, %%eax \n\t"
|
||||
"shrl $0, %%eax \n\t"
|
||||
"movl 4(%%esp), %%edx \n\t"
|
||||
"movl %[col2], %%edx \n\t"
|
||||
"andl $0x000000FF, %%edx \n\t"
|
||||
"shrl $0, %%edx \n\t"
|
||||
"addl %%edx, %%eax \n\t"
|
||||
|
@ -601,15 +601,10 @@ COLOR AddColors( COLOR col1, COLOR col2)
|
|||
"orl %%ecx, %%eax \n\t"
|
||||
"shll $0, %%eax \n\t"
|
||||
"andl $0x000000FF, %%eax \n\t"
|
||||
"orl %%eax, %%ebx \n\t"
|
||||
"movl %%ebx, %%ecx \n\t"
|
||||
|
||||
// done.
|
||||
"addl $8, %%esp \n\t"
|
||||
"popl %%ebx \n\t"
|
||||
: "=c" (colRet)
|
||||
: "S" (col1), "D" (col2)
|
||||
: "eax", "edx", "cc", "memory"
|
||||
"orl %[xbx], %%eax \n\t"
|
||||
: "=&a" (colRet), [xbx] "=&g" (tmp)
|
||||
: [col1] "g" (col1), [col2] "g" (col2)
|
||||
: "ecx", "edx", "esi", "cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
|
|
@ -332,7 +332,7 @@ inline void CopyLongs( ULONG *pulSrc, ULONG *pulDst, INDEX ctLongs)
|
|||
"cld \n\t"
|
||||
"rep \n\t"
|
||||
"movsd \n\t"
|
||||
: // no outputs.
|
||||
: "=S" (pulSrc), "=D" (pulDst), "=c" (ctLongs)
|
||||
: "S" (pulSrc), "D" (pulDst), "c" (ctLongs)
|
||||
: "cc", "memory"
|
||||
);
|
||||
|
@ -364,7 +364,7 @@ inline void StoreLongs( ULONG ulVal, ULONG *pulDst, INDEX ctLongs)
|
|||
"cld \n\t"
|
||||
"rep \n\t"
|
||||
"stosd \n\t"
|
||||
: // no outputs.
|
||||
: "=D" (pulDst), "=c" (ctLongs)
|
||||
: "a" (ulVal), "D" (pulDst), "c" (ctLongs)
|
||||
: "cc", "memory"
|
||||
);
|
||||
|
|
|
@ -186,10 +186,10 @@ elemDone:
|
|||
}
|
||||
#elif (defined __GNU_INLINE__)
|
||||
__asm__ __volatile__ (
|
||||
"pushl %%ebx \n\t" // Save GCC's register.
|
||||
"movl %%eax, %%ebx \n\t"
|
||||
|
||||
"movd %%ebx, %%mm1 \n\t"
|
||||
"movl %[ctElems], %%ecx \n\t"
|
||||
"movl %[piDst], %%edi \n\t"
|
||||
"movl %[piElements], %%esi \n\t"
|
||||
"movd %[iVtx0Pass], %%mm1 \n\t"
|
||||
"movq %%mm1, %%mm0 \n\t"
|
||||
"psllq $32, %%mm1 \n\t"
|
||||
"por %%mm0, %%mm1 \n\t"
|
||||
|
@ -205,17 +205,18 @@ elemDone:
|
|||
"jnz 0b \n\t" // elemLoop
|
||||
"1: \n\t" // elemRest
|
||||
"emms \n\t"
|
||||
"testl $1, %%edx \n\t"
|
||||
"testl $1, %[ctElems] \n\t"
|
||||
"jz 2f \n\t" // elemDone
|
||||
"movl (%%esi), %%eax \n\t"
|
||||
"addl %%ebx, %%eax \n\t"
|
||||
"addl %[iVtx0Pass], %%eax \n\t"
|
||||
"movl %%eax, (%%edi) \n\t"
|
||||
"2: \n\t" // elemDone
|
||||
"popl %%ebx \n\t" // restore GCC's register.
|
||||
: // no outputs.
|
||||
: "c" (ctElems), "d" (ctElems), "D" (piDst),
|
||||
"S" (pspo->spo_piElements), "a" (pspo->spo_iVtx0Pass)
|
||||
: "cc", "memory"
|
||||
: [ctElems] "g" (ctElems), [piDst] "g" (piDst),
|
||||
[piElements] "g" (pspo->spo_piElements),
|
||||
[iVtx0Pass] "g" (pspo->spo_iVtx0Pass)
|
||||
: FPU_REGS, "mm0", "mm1", "eax", "ecx", "esi", "edi",
|
||||
"cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
@ -506,12 +507,13 @@ static void RSBinToGroups( ScenePolygon *pspoFirst)
|
|||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
__asm__ __volatile__ (
|
||||
"movl $2, %%eax \n\t"
|
||||
"bsrl (%%esi), %%ecx \n\t"
|
||||
"shll %%cl, %%eax \n\t"
|
||||
"movl %%eax, (%%esi) \n\t"
|
||||
: // no outputs.
|
||||
: "a" (2), "S" (&_ctGroupsCount)
|
||||
: "ecx", "cc", "memory"
|
||||
: "S" (&_ctGroupsCount)
|
||||
: "eax", "ecx", "cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
|
|
@ -97,6 +97,8 @@ pixLoop:
|
|||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
__asm__ __volatile__ (
|
||||
"movl %[pubTexture], %%esi \n\t"
|
||||
"movl %[pixTextureSize], %%ecx \n\t"
|
||||
"leal 0(%%esi, %%ecx), %%edi \n\t"
|
||||
"0: \n\t" // pixLoop
|
||||
"movzbl (%%esi), %%eax \n\t"
|
||||
|
@ -108,8 +110,9 @@ pixLoop:
|
|||
"decl %%ecx \n\t"
|
||||
"jnz 0b \n\t" // pixLoop
|
||||
: // no outputs.
|
||||
: "S" (pubTexture), "D" (pubTexture), "c" (pixTextureSize)
|
||||
: "eax", "cc", "memory"
|
||||
: [pubTexture] "g" (pubTexture),
|
||||
[pixTextureSize] "g" (pixTextureSize)
|
||||
: "eax", "ecx", "esi", "edi", "cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
|
|
@ -219,6 +219,9 @@ void UploadTexture_OGL( ULONG *pulTexture, PIX pixSizeU, PIX pixSizeV,
|
|||
#elif (defined __GNU_INLINE__)
|
||||
__asm__ __volatile__ (
|
||||
"pxor %%mm0,%%mm0 \n\t"
|
||||
"movl %[pulSrc],%%esi \n\t"
|
||||
"movl %[pulDst],%%edi \n\t"
|
||||
"movl %[pixSize],%%ecx \n\t"
|
||||
"0: \n\t" // pixLoop
|
||||
"movd 0(%%esi), %%mm1 \n\t"
|
||||
"movd 4(%%esi), %%mm2 \n\t"
|
||||
|
@ -234,8 +237,10 @@ void UploadTexture_OGL( ULONG *pulTexture, PIX pixSizeU, PIX pixSizeV,
|
|||
"jnz 0b \n\t" // pixLoop
|
||||
"emms \n\t"
|
||||
:
|
||||
: "S" (pulSrc), "D" (pulDst), "c" (pixSize)
|
||||
: "memory", "cc"
|
||||
: [pulSrc] "g" (pulSrc), [pulDst] "g" (pulDst),
|
||||
[pixSize] "g" (pixSize)
|
||||
: FPU_REGS, "mm0", "mm1", "mm2",
|
||||
"ecx", "esi", "edi", "memory", "cc"
|
||||
);
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,12 +13,6 @@ You should have received a copy of the GNU General Public License along
|
|||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
|
||||
|
||||
// !!! FIXME: One of the GNU inline asm blocks has a bug that causes the
|
||||
// !!! FIXME: title on the main menu to render incorrectly. (Generating an
|
||||
// !!! FIXME: incorrect mipmap?) The intel compiler works fine with the
|
||||
// !!! FIXME: MSVC inline asm, but GCC and Intel both have the problem when
|
||||
// !!! FIXME: using the GNU inline asm.
|
||||
|
||||
#include "Engine/StdH.h"
|
||||
|
||||
#include <Engine/Base/Statistics_Internal.h>
|
||||
|
@ -198,9 +192,9 @@ void FlipBitmap( UBYTE *pubSrc, UBYTE *pubDst, PIX pixWidth, PIX pixHeight, INDE
|
|||
|
||||
// makes one level lower mipmap (bilinear or nearest-neighbour with border preservance)
|
||||
#if (defined __GNUC__)
|
||||
static __int64 mmRounder = 0x0002000200020002ll;
|
||||
__int64 mmRounder = 0x0002000200020002ll;
|
||||
#else
|
||||
static __int64 mmRounder = 0x0002000200020002;
|
||||
__int64 mmRounder = 0x0002000200020002;
|
||||
#endif
|
||||
|
||||
static void MakeOneMipmap( ULONG *pulSrcMipmap, ULONG *pulDstMipmap, PIX pixWidth, PIX pixHeight, BOOL bBilinear)
|
||||
|
@ -305,19 +299,19 @@ pixLoopN:
|
|||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
__asm__ __volatile__ (
|
||||
"pushl %%ebx \n\t" // Save GCC's register.
|
||||
"movl %%ecx, %%ebx \n\t"
|
||||
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
"movl %[pulSrcMipmap], %%esi \n\t"
|
||||
"movl %[pulDstMipmap], %%edi \n\t"
|
||||
"movl %[pixHeight], %%edx \n\t"
|
||||
|
||||
"0: \n\t" // rowLoop
|
||||
"movl %%ebx, %%ecx \n\t"
|
||||
"movl %[pixWidth], %%ecx \n\t"
|
||||
|
||||
"1: \n\t" // pixLoopN
|
||||
"movd 0(%%esi), %%mm1 \n\t" // up-left
|
||||
"movd 4(%%esi), %%mm2 \n\t" // up-right
|
||||
"movd 0(%%esi, %%ebx, 8), %%mm3 \n\t" // down-left
|
||||
"movd 4(%%esi, %%ebx, 8), %%mm4 \n\t" // down-right
|
||||
"movd 0(%%esi, %[pixWidth], 8), %%mm3 \n\t" // down-left
|
||||
"movd 4(%%esi, %[pixWidth], 8), %%mm4 \n\t" // down-right
|
||||
"punpcklbw %%mm0, %%mm1 \n\t"
|
||||
"punpcklbw %%mm0, %%mm2 \n\t"
|
||||
"punpcklbw %%mm0, %%mm3 \n\t"
|
||||
|
@ -325,7 +319,7 @@ pixLoopN:
|
|||
"paddw %%mm2, %%mm1 \n\t"
|
||||
"paddw %%mm3, %%mm1 \n\t"
|
||||
"paddw %%mm4, %%mm1 \n\t"
|
||||
"paddw (%%eax), %%mm1 \n\t"
|
||||
"paddw (" ASMSYM(mmRounder) "), %%mm1 \n\t"
|
||||
"psrlw $2, %%mm1 \n\t"
|
||||
"packuswb %%mm0, %%mm1 \n\t"
|
||||
"movd %%mm1, (%%edi) \n\t"
|
||||
|
@ -338,15 +332,17 @@ pixLoopN:
|
|||
|
||||
// advance to next row
|
||||
// skip one row in source mip-map
|
||||
"leal 0(%%esi, %%ebx, 8), %%esi \n\t"
|
||||
"leal 0(%%esi, %[pixWidth], 8), %%esi \n\t"
|
||||
"decl %%edx \n\t"
|
||||
"jnz 0b \n\t" // rowLoop
|
||||
"popl %%ebx \n\t" // restore GCC's register.
|
||||
"emms \n\t"
|
||||
: // no outputs.
|
||||
: "a" (&mmRounder), "c" (pixWidth), "S" (pulSrcMipmap),
|
||||
"D" (pulDstMipmap), "d" (pixHeight)
|
||||
: "cc", "memory"
|
||||
: [pixWidth] "r" (pixWidth),
|
||||
[pulSrcMipmap] "g" (pulSrcMipmap),
|
||||
[pulDstMipmap] "g" (pulDstMipmap),
|
||||
[pixHeight] "g" (pixHeight)
|
||||
: FPU_REGS, MMX_REGS, "ecx", "edx", "esi", "edi",
|
||||
"cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
@ -433,23 +429,22 @@ fullEnd:
|
|||
}
|
||||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
ULONG tmp, tmp2;
|
||||
__asm__ __volatile__ (
|
||||
"pushl %%ebx \n\t" // Save GCC's register.
|
||||
"movl %%ecx, %%ebx \n\t"
|
||||
|
||||
"xorl %[xbx], %[xbx] \n\t"
|
||||
"movl %[pulSrcMipmap], %%esi \n\t"
|
||||
"movl %[pulDstMipmap], %%edi \n\t"
|
||||
// setup upper half
|
||||
"pushl %%edx \n\t" // pixHeight
|
||||
"pushl %%eax \n\t" // ulRowModulo
|
||||
"pushl %%ebx \n\t" // pixWidth
|
||||
"xorl %%ebx, %%ebx \n\t"
|
||||
"shrl $1, %%edx \n\t"
|
||||
"movl %[pixHeight], %%eax \n\t"
|
||||
"movl %%eax, %[xdx] \n\t"
|
||||
"shrl $1, %[xdx] \n\t"
|
||||
|
||||
"0: \n\t" // halfLoop
|
||||
"movl (%%esp), %%ecx \n\t"
|
||||
"movl %[pixWidth], %%ecx \n\t"
|
||||
"shrl $1, %%ecx \n\t"
|
||||
|
||||
"1: \n\t" // leftLoop
|
||||
"movl 0(%%esi, %%ebx, 8), %%eax \n\t" // upper-left (or lower-left)
|
||||
"movl 0(%%esi, %[xbx], 8), %%eax \n\t" // upper-left (or lower-left)
|
||||
"movl %%eax, (%%edi) \n\t"
|
||||
|
||||
// advance to next pixel
|
||||
|
@ -459,12 +454,12 @@ fullEnd:
|
|||
"jg 1b \n\t" // leftLoop
|
||||
|
||||
// do right row half
|
||||
"movl (%%esp), %%ecx \n\t"
|
||||
"movl %[pixWidth], %%ecx \n\t"
|
||||
"shrl $1, %%ecx \n\t"
|
||||
"jz 3f \n\t" // halfEnd
|
||||
|
||||
"2: \n\t" // rightLoop
|
||||
"movl 4(%%esi, %%ebx, 8), %%eax \n\t" // upper-right (or lower-right)
|
||||
"movl 4(%%esi, %[xbx], 8), %%eax \n\t" // upper-right (or lower-right)
|
||||
"movl %%eax, (%%edi) \n\t"
|
||||
|
||||
// advance to next pixel
|
||||
|
@ -475,25 +470,26 @@ fullEnd:
|
|||
|
||||
"3: \n\t" // halfEnd
|
||||
// advance to next row
|
||||
"addl 4(%%esp), %%esi \n\t" // skip one row in source mip-map
|
||||
"subl $1, %%edx \n\t"
|
||||
"addl %[ulRowModulo], %%esi \n\t" // skip one row in source mip-map
|
||||
"subl $1, %[xdx] \n\t"
|
||||
"jg 0b \n\t" // halfLoop
|
||||
|
||||
// do eventual lower half loop (if not yet done)
|
||||
"movl 8(%%esp), %%edx \n\t"
|
||||
"shrl $1, %%edx \n\t"
|
||||
"movl %[pixHeight], %%eax \n\t"
|
||||
"movl %%eax, %[xdx] \n\t"
|
||||
"shrl $1, %[xdx] \n\t"
|
||||
"jz 4f \n\t" // fullEnd
|
||||
"cmpl (%%esp), %%ebx \n\t"
|
||||
"movl (%%esp), %%ebx \n\t"
|
||||
"cmpl %[pixWidth], %[xbx] \n\t"
|
||||
"movl %[pixWidth], %[xbx] \n\t"
|
||||
"jne 0b \n\t" // halfLoop
|
||||
|
||||
"4: \n\t" // fullEnd
|
||||
"addl $12, %%esp \n\t"
|
||||
"popl %%ebx \n\t" // restore GCC's register.
|
||||
: // no outputs.
|
||||
: "S" (pulSrcMipmap), "D" (pulDstMipmap), "d" (pixHeight),
|
||||
"c" (pixWidth), "a" (ulRowModulo)
|
||||
: "cc", "memory"
|
||||
: [xbx] "=&r" (tmp), [xdx] "=&g" (tmp2)
|
||||
: [pulSrcMipmap] "g" (pulSrcMipmap),
|
||||
[pulDstMipmap] "g" (pulDstMipmap),
|
||||
[pixHeight] "g" (pixHeight), [pixWidth] "g" (pixWidth),
|
||||
[ulRowModulo] "g" (ulRowModulo)
|
||||
: "eax", "ecx", "esi", "edi", "cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
@ -857,26 +853,27 @@ nextRowO:
|
|||
}
|
||||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
ULONG tmp;
|
||||
__asm__ __volatile__ (
|
||||
"movl %[pulSrc], %%esi \n\t"
|
||||
"movl %[pulDst], %%edi \n\t"
|
||||
// reset dither line offset
|
||||
"pushl %%ebx \n\t" // save GCC's register.
|
||||
"movl (" ASMSYM(pulDitherTable) "), %%ebx \n\t"
|
||||
"pushl %%ecx \n\t" // slModulo
|
||||
"pushl %%eax \n\t" // pixWidth
|
||||
"movl %[pixHeight], %%eax \n\t"
|
||||
"movl %%eax, %[xdx] \n\t"
|
||||
"xorl %%eax, %%eax \n\t"
|
||||
|
||||
"rowLoopO: \n\t"
|
||||
"0: \n\t" // rowLoopO
|
||||
// get horizontal dither patterns
|
||||
"movq 0(%%ebx, %%eax, 4), %%mm4 \n\t"
|
||||
"movq 8(%%ebx, %%eax, 4), %%mm5 \n\t"
|
||||
"movq 0(%[pulDitherTable], %%eax, 4), %%mm4 \n\t"
|
||||
"movq 8(%[pulDitherTable], %%eax, 4), %%mm5 \n\t"
|
||||
"psrlw (" ASMSYM(mmShifter) "), %%mm4 \n\t"
|
||||
"psrlw (" ASMSYM(mmShifter) "), %%mm5 \n\t"
|
||||
"pand (" ASMSYM(mmMask) "), %%mm4 \n\t"
|
||||
"pand (" ASMSYM(mmMask) "), %%mm5 \n\t"
|
||||
|
||||
// process row
|
||||
"movl (%%esp), %%ecx \n\t"
|
||||
"pixLoopO: \n\t"
|
||||
"movl %[pixWidth], %%ecx \n\t"
|
||||
"1: \n\t" // pixLoopO
|
||||
"movq 0(%%esi), %%mm1 \n\t"
|
||||
"movq 8(%%esi), %%mm2 \n\t"
|
||||
"paddusb %%mm4, %%mm1 \n\t"
|
||||
|
@ -888,30 +885,30 @@ nextRowO:
|
|||
"addl $16, %%esi \n\t"
|
||||
"addl $16, %%edi \n\t"
|
||||
"subl $4, %%ecx \n\t"
|
||||
"jg pixLoopO \n\t" // !!!! possible memory leak?
|
||||
"je nextRowO \n\t"
|
||||
"jg 1b \n\t" // !!!! possible memory leak?
|
||||
"je 2f \n\t" // nextRowO
|
||||
|
||||
// backup couple of pixels
|
||||
"leal 0(%%esi, %%ecx, 4), %%esi \n\t"
|
||||
"leal 0(%%edi, %%ecx, 4), %%edi \n\t"
|
||||
|
||||
"nextRowO: \n\t"
|
||||
"2: \n\t" // nextRowO
|
||||
// get next dither line patterns
|
||||
"addl 4(%%esp), %%esi \n\t"
|
||||
"addl 4(%%esp), %%edi \n\t"
|
||||
"addl %[slModulo], %%esi \n\t"
|
||||
"addl %[slModulo], %%edi \n\t"
|
||||
"addl $4, %%eax \n\t"
|
||||
"andl $15, %%eax \n\t"
|
||||
|
||||
// advance to next row
|
||||
"decl %%edx \n\t"
|
||||
"jnz rowLoopO \n\t"
|
||||
"decl %[xdx] \n\t"
|
||||
"jnz 0b \n\t" // rowLoopO
|
||||
"emms \n\t"
|
||||
"addl $8, %%esp \n\t"
|
||||
"popl %%ebx \n\t" // restore GCC's register.
|
||||
: // no outputs.
|
||||
: "S" (pulSrc), "D" (pulDst), "d" (pixHeight),
|
||||
"a" (pixWidth), "c" (slModulo)
|
||||
: "cc", "memory"
|
||||
: [xdx] "=&g" (tmp)
|
||||
: [pulSrc] "g" (pulSrc), [pulDst] "g" (pulDst),
|
||||
[pixHeight] "g" (pixHeight), [pixWidth] "g" (pixWidth),
|
||||
[slModulo] "g" (slModulo), [pulDitherTable] "r" (pulDitherTable)
|
||||
: FPU_REGS, MMX_REGS, "eax", "ecx", "esi", "edi",
|
||||
"cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
@ -1051,17 +1048,17 @@ allDoneE:
|
|||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
__asm__ __volatile__ (
|
||||
"pushl %%ebx \n\t" // Save GCC's register.
|
||||
"movl %%ecx, %%ebx \n\t"
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
"movl %[pulDst], %%esi \n\t"
|
||||
"movl %[pixHeight], %%edx \n\t"
|
||||
"decl %%edx \n\t" // need not to dither last row
|
||||
|
||||
"rowLoopE: \n\t"
|
||||
"0: \n\t" // rowLoopE
|
||||
// left to right
|
||||
"movl %%eax, %%ecx \n\t"
|
||||
"movl %[pixWidth], %%ecx \n\t"
|
||||
"decl %%ecx \n\t"
|
||||
|
||||
"pixLoopEL: \n\t"
|
||||
"1: \n\t" // pixLoopEL
|
||||
"movd (%%esi), %%mm1 \n\t"
|
||||
"punpcklbw %%mm0, %%mm1 \n\t"
|
||||
"pand (" ASMSYM(mmErrDiffMask) "), %%mm1 \n\t"
|
||||
|
@ -1086,29 +1083,29 @@ allDoneE:
|
|||
|
||||
// spread errors
|
||||
"paddusb 4(%%esi), %%mm7 \n\t"
|
||||
"paddusb -4(%%esi, %%ebx, 4), %%mm3 \n\t"
|
||||
"paddusb 0(%%esi, %%ebx, 4), %%mm5 \n\t"
|
||||
"paddusb 4(%%esi, %%ebx, 4), %%mm1 \n\t" // !!!! possible memory leak?
|
||||
"paddusb -4(%%esi, %[pixCanvasWidth], 4), %%mm3 \n\t"
|
||||
"paddusb 0(%%esi, %[pixCanvasWidth], 4), %%mm5 \n\t"
|
||||
"paddusb 4(%%esi, %[pixCanvasWidth], 4), %%mm1 \n\t" // !!!! possible memory leak?
|
||||
"movd %%mm7, 4(%%esi) \n\t"
|
||||
"movd %%mm3, -4(%%esi, %%ebx, 4) \n\t"
|
||||
"movd %%mm5, 0(%%esi, %%ebx, 4) \n\t"
|
||||
"movd %%mm1, 4(%%esi, %%ebx, 4) \n\t"
|
||||
"movd %%mm3, -4(%%esi, %[pixCanvasWidth], 4) \n\t"
|
||||
"movd %%mm5, 0(%%esi, %[pixCanvasWidth], 4) \n\t"
|
||||
"movd %%mm1, 4(%%esi, %[pixCanvasWidth], 4) \n\t"
|
||||
|
||||
// advance to next pixel
|
||||
"addl $4, %%esi \n\t"
|
||||
"decl %%ecx \n\t"
|
||||
"jnz pixLoopEL \n\t"
|
||||
"jnz 1b \n\t" // pixLoopEL
|
||||
|
||||
// advance to next row
|
||||
"addl %%edi, %%esi \n\t"
|
||||
"addl %[slWidthModulo], %%esi \n\t"
|
||||
"decl %%edx \n\t"
|
||||
"jz allDoneE \n\t"
|
||||
"jz 3f \n\t" // allDoneE
|
||||
|
||||
// right to left
|
||||
"movl %%eax, %%ecx \n\t"
|
||||
"movl %[pixWidth], %%ecx \n\t"
|
||||
"decl %%ecx \n\t"
|
||||
|
||||
"pixLoopER: \n\t"
|
||||
"2: \n\t" // pixLoopER
|
||||
"movd (%%esi), %%mm1 \n\t"
|
||||
"punpcklbw %%mm0, %%mm1 \n\t"
|
||||
"pand (" ASMSYM(mmErrDiffMask) "), %%mm1 \n\t"
|
||||
|
@ -1133,30 +1130,30 @@ allDoneE:
|
|||
|
||||
// spread errors
|
||||
"paddusb -4(%%esi), %%mm7 \n\t"
|
||||
"paddusb -4(%%esi, %%ebx, 4), %%mm1 \n\t"
|
||||
"paddusb 0(%%esi, %%ebx, 4), %%mm5 \n\t"
|
||||
"paddusb 4(%%esi, %%ebx, 4), %%mm3 \n\t" // !!!! possible memory leak?
|
||||
"paddusb -4(%%esi, %[pixCanvasWidth], 4), %%mm1 \n\t"
|
||||
"paddusb 0(%%esi, %[pixCanvasWidth], 4), %%mm5 \n\t"
|
||||
"paddusb 4(%%esi, %[pixCanvasWidth], 4), %%mm3 \n\t" // !!!! possible memory leak?
|
||||
"movd %%mm7, -4(%%esi) \n\t"
|
||||
"movd %%mm1, -4(%%esi, %%ebx, 4) \n\t"
|
||||
"movd %%mm5, 0(%%esi, %%ebx, 4) \n\t"
|
||||
"movd %%mm3, 4(%%esi, %%ebx, 4) \n\t"
|
||||
"movd %%mm1, -4(%%esi, %[pixCanvasWidth], 4) \n\t"
|
||||
"movd %%mm5, 0(%%esi, %[pixCanvasWidth], 4) \n\t"
|
||||
"movd %%mm3, 4(%%esi, %[pixCanvasWidth], 4) \n\t"
|
||||
|
||||
// revert to previous pixel
|
||||
"subl $4, %%esi \n\t"
|
||||
"decl %%ecx \n\t"
|
||||
"jnz pixLoopER \n\t"
|
||||
"jnz 2b \n\t" // pixLoopER
|
||||
|
||||
// advance to next row
|
||||
"leal 0(%%esi, %%ebx, 4), %%esi \n\t"
|
||||
"leal 0(%%esi, %[pixCanvasWidth], 4), %%esi \n\t"
|
||||
"decl %%edx \n\t"
|
||||
"jnz rowLoopE \n\t"
|
||||
"allDoneE: \n\t"
|
||||
"popl %%ebx \n\t"
|
||||
"jnz 0b \n\t" // rowLoopE
|
||||
"3: \n\t" // allDoneE
|
||||
"emms \n\t"
|
||||
: // no outputs.
|
||||
: "S" (pulDst), "c" (pixCanvasWidth), "d" (pixHeight), "a" (pixWidth),
|
||||
"D" (slWidthModulo)
|
||||
: "cc", "memory"
|
||||
: [pulDst] "g" (pulDst), [pixCanvasWidth] "r" (pixCanvasWidth),
|
||||
[pixHeight] "g" (pixHeight), [pixWidth] "g" (pixWidth),
|
||||
[slWidthModulo] "g" (slWidthModulo)
|
||||
: FPU_REGS, MMX_REGS, "ecx", "edx", "esi", "cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
@ -1268,7 +1265,7 @@ extern "C" {
|
|||
}
|
||||
|
||||
|
||||
#if USE_PORTABLE_C
|
||||
#ifdef USE_PORTABLE_C
|
||||
typedef SWORD ExtPix[4];
|
||||
|
||||
static inline void extpix_fromi64(ExtPix &pix, const __int64 i64)
|
||||
|
@ -2535,7 +2532,8 @@ lowerLoop:
|
|||
"popl %%ebx \n\t"
|
||||
: // no outputs.
|
||||
: // inputs are all globals.
|
||||
: "eax", "ecx", "edx", "edi", "esi", "cc", "memory"
|
||||
: FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi",
|
||||
"cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
|
|
@ -1363,6 +1363,13 @@ pixLoop:
|
|||
_pixBaseWidth_renderWater = pixBaseWidth;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
// this sucks :(
|
||||
"movl %[pixBaseHeight], %%eax \n\t"
|
||||
"movl %[pswHeightMap], %%ecx \n\t"
|
||||
"movl %[pulTexture], %%edx \n\t"
|
||||
"movl %[pulTextureBase], %%esi \n\t"
|
||||
"movl %[slHeightRowStep], %%edi \n\t"
|
||||
|
||||
"pushl %%ebx \n\t" // GCC needs this.
|
||||
"movl (" ASMSYM(_pixBaseWidth_renderWater) "),%%ebx \n\t"
|
||||
|
||||
|
@ -1444,9 +1451,13 @@ pixLoop:
|
|||
"popl %%ebx \n\t" // restore GCC's register.
|
||||
"emms \n\t"
|
||||
: // no outputs.
|
||||
: "a" (pixBaseHeight), "c" (pswHeightMap),
|
||||
"d" (pulTexture), "S" (pulTextureBase), "D" (slHeightRowStep)
|
||||
: "cc", "memory"
|
||||
: [pixBaseHeight] "g" (pixBaseHeight),
|
||||
[pswHeightMap] "g" (pswHeightMap),
|
||||
[pulTexture] "g" (pulTexture),
|
||||
[pulTextureBase] "g" (pulTextureBase),
|
||||
[slHeightRowStep] "g" (slHeightRowStep)
|
||||
: FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi",
|
||||
"cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
@ -1617,9 +1628,7 @@ pixLoop2:
|
|||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
__asm__ __volatile__ (
|
||||
"pushl %%ebx \n\t" // GCC's register.
|
||||
"movl %%ecx, %%ebx \n\t"
|
||||
"bsfl %%eax, %%eax \n\t" // pixBaseWidth
|
||||
"bsfl %[pixBaseWidth], %%eax \n\t"
|
||||
"movl $32, %%edx \n\t"
|
||||
"subl %%eax, %%edx \n\t"
|
||||
"movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t"
|
||||
|
@ -1631,11 +1640,11 @@ pixLoop2:
|
|||
|
||||
"pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU
|
||||
|
||||
// (These registers were loaded here in the original version...)
|
||||
//"movl (pswHeightMap), %%ebx \n\t"
|
||||
//"movl (pulTextureBase), %%esi \n\t"
|
||||
//"movl (pulTexture), %%edi \n\t"
|
||||
|
||||
"movl %[pswHeightMap], %%edx \n\t"
|
||||
"movl %[pulTextureBase], %%esi \n\t"
|
||||
"movl %[pulTexture], %%edi \n\t"
|
||||
"pushl %%ebx \n\t" // GCC's register.
|
||||
"movl %%edx, %%ebx \n\t"
|
||||
"movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t"
|
||||
|
||||
"0: \n\t" // rowLoop2
|
||||
|
@ -1753,9 +1762,12 @@ pixLoop2:
|
|||
"popl %%ebx \n\t" // GCC's value.
|
||||
"emms \n\t"
|
||||
: // no outputs.
|
||||
: "a" (pixBaseWidth), "c" (pswHeightMap),
|
||||
"S" (pulTextureBase), "D" (pulTexture)
|
||||
: "edx", "cc", "memory"
|
||||
: [pixBaseWidth] "g" (pixBaseWidth),
|
||||
[pswHeightMap] "g" (pswHeightMap),
|
||||
[pulTextureBase] "g" (pulTextureBase),
|
||||
[pulTexture] "g" (pulTexture)
|
||||
: FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi",
|
||||
"cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
@ -2136,26 +2148,24 @@ pixLoop4:
|
|||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
__asm__ __volatile__ (
|
||||
"pushl %%ebx \n\t" // GCC's register.
|
||||
"movl %%ecx, %%ebx \n\t"
|
||||
"bsfl %%eax, %%eax \n\t"
|
||||
"movl $32, %%edx \n\t"
|
||||
"subl %%eax, %%edx \n\t"
|
||||
"movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t"
|
||||
"bsfl %[pixBaseWidth], %%eax \n\t"
|
||||
"movl $32, %%edx \n\t"
|
||||
"subl %%eax, %%edx \n\t"
|
||||
"movl %%edx, (" ASMSYM(mmBaseWidthShift) ") \n\t"
|
||||
|
||||
"movq (" ASMSYM(mmBaseHeightMask) "), %%mm0 \n\t"
|
||||
"psllq $32, %%mm0 \n\t"
|
||||
"por (" ASMSYM(mmBaseWidthMask) "), %%mm0 \n\t"
|
||||
"movq %%mm0, (" ASMSYM(mmBaseMasks) ") \n\t"
|
||||
"movq (" ASMSYM(mmBaseHeightMask) "), %%mm0 \n\t"
|
||||
"psllq $32, %%mm0 \n\t"
|
||||
"por (" ASMSYM(mmBaseWidthMask) "), %%mm0 \n\t"
|
||||
"movq %%mm0, (" ASMSYM(mmBaseMasks) ") \n\t"
|
||||
|
||||
"pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU
|
||||
"pxor %%mm6, %%mm6 \n\t" // MM6 = pixV|pixU
|
||||
|
||||
// (These registers were loaded here in the original version...)
|
||||
//"movl (pswHeightMap), %%ebx \n\t"
|
||||
//"movl (pulTextureBase), %%esi \n\t"
|
||||
//"movl (pulTexture), %%edi \n\t"
|
||||
|
||||
"movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t"
|
||||
"movl %[pswHeightMap], %%edx \n\t"
|
||||
"movl %[pulTextureBase], %%esi \n\t"
|
||||
"movl %[pulTexture], %%edi \n\t"
|
||||
"pushl %%ebx \n\t" // GCC's register.
|
||||
"movl %%edx, %%ebx \n\t"
|
||||
"movl (" ASMSYM(_pixBufferHeight) "), %%edx \n\t"
|
||||
"0: \n\t" // rowLoop4
|
||||
"pushl %%edx \n\t"
|
||||
"movl (" ASMSYM(_pixBufferWidth) "), %%ecx \n\t"
|
||||
|
@ -2485,9 +2495,12 @@ pixLoop4:
|
|||
"popl %%ebx \n\t" // Restore GCC's value.
|
||||
"emms \n\t"
|
||||
: // no outputs.
|
||||
: "a" (pixBaseWidth), "c" (pswHeightMap),
|
||||
"S" (pulTextureBase), "D" (pulTexture)
|
||||
: "edx", "cc", "memory"
|
||||
: [pixBaseWidth] "g" (pixBaseWidth),
|
||||
[pswHeightMap] "g" (pswHeightMap),
|
||||
[pulTextureBase] "g" (pulTextureBase),
|
||||
[pulTexture] "g" (pulTexture)
|
||||
: FPU_REGS, MMX_REGS, "eax", "ecx", "edx", "esi", "edi",
|
||||
"cc", "memory"
|
||||
);
|
||||
|
||||
|
||||
|
@ -2965,6 +2978,11 @@ pixDone:
|
|||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
__asm__ __volatile__ (
|
||||
"movl %[slColumnModulo], %%edx \n\t"
|
||||
"movl %[slBufferMask], %%ecx \n\t"
|
||||
"movl %[slDensity], %%eax \n\t"
|
||||
"movl (" ASMSYM(ulRNDSeed) "), %%edi \n\t"
|
||||
|
||||
"pushl %%ebx \n\t" // GCC's register.
|
||||
"xorl %%ebx, %%ebx \n\t"
|
||||
"pushl %%edx \n\t" // slColumnModulo
|
||||
|
@ -2977,7 +2995,7 @@ pixDone:
|
|||
|
||||
"1: \n\t" // rowLoopFM
|
||||
"movl (" ASMSYM(_pixBufferWidth) "), %%edx \n\t"
|
||||
"addl %%esi, %%edx \n\t"
|
||||
"addl %[pubNew], %%edx \n\t"
|
||||
"movzbl (%%ebx, %%edx), %%eax \n\t"
|
||||
"addl (" ASMSYM(_pixBufferWidth) "), %%edx \n\t"
|
||||
"movzbl (%%ebx, %%edx), %%edx \n\t"
|
||||
|
@ -2985,7 +3003,7 @@ pixDone:
|
|||
"shrl $1, %%eax \n\t"
|
||||
"cmpl (%%esp), %%eax \n\t"
|
||||
"jg doCalc_animateFire \n\t"
|
||||
"movb $0, (%%esi, %%ebx) \n\t"
|
||||
"movb $0, (%[pubNew], %%ebx) \n\t"
|
||||
"jmp pixDone_animateFire \n\t"
|
||||
|
||||
"doCalc_animateFire: \n\t"
|
||||
|
@ -2996,7 +3014,7 @@ pixDone:
|
|||
"movsbl " ASMSYM(asbMod3Sub1Table) "(%%edx), %%edx \n\t"
|
||||
"addl %%ebx, %%edx \n\t"
|
||||
"andl 4(%%esp), %%edx \n\t" // slBufferMask
|
||||
"movb %%al, (%%esi, %%edx) \n\t"
|
||||
"movb %%al, (%[pubNew], %%edx) \n\t"
|
||||
"imull $262147, %%edi \n\t"
|
||||
|
||||
"pixDone_animateFire: \n\t"
|
||||
|
@ -3015,9 +3033,10 @@ pixDone:
|
|||
"addl $12, %%esp \n\t" // lose our locals.
|
||||
"popl %%ebx \n\t" // Restore GCC's var.
|
||||
: // no outputs.
|
||||
: "a" (slDensity), "c" (slBufferMask),
|
||||
"d" (slColumnModulo), "D" (ulRNDSeed), "S" (pubNew)
|
||||
: "cc", "memory"
|
||||
: [slBufferMask] "g" (slBufferMask),
|
||||
[slColumnModulo] "g" (slColumnModulo),
|
||||
[pubNew] "r" (pubNew), [slDensity] "g" (slDensity)
|
||||
: "eax", "ecx", "edx", "edi", "cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
@ -3103,6 +3122,12 @@ pixLoopF:
|
|||
#elif (defined __GNU_INLINE__)
|
||||
_pubHeat_RenderPlasmaFire = pubHeat; // ran out of registers. :/
|
||||
__asm__ __volatile__ (
|
||||
"movl %[slHeatRowStep], %%eax \n\t"
|
||||
"movl %[slHeatMapStep], %%edx \n\t"
|
||||
"movl %[slBaseMipShift], %%ecx \n\t"
|
||||
"movl %[pulTextureBase], %%esi \n\t"
|
||||
"movl %[pulTexture], %%edi \n\t"
|
||||
|
||||
"pushl %%ebx \n\t"
|
||||
"movl (" ASMSYM(_pubHeat_RenderPlasmaFire) "),%%ebx \n\t"
|
||||
"pushl %%eax \n\t" // slHeatRowStep
|
||||
|
@ -3131,9 +3156,12 @@ pixLoopF:
|
|||
"addl $12, %%esp \n\t" // lose our locals.
|
||||
"popl %%ebx \n\t" // restore GCC's register.
|
||||
: // no outputs.
|
||||
: "S" (pulTextureBase), "D" (pulTexture),
|
||||
"c" (slBaseMipShift), "a" (slHeatRowStep), "d" (slHeatMapStep)
|
||||
: "cc", "memory"
|
||||
: [pulTextureBase] "g" (pulTextureBase),
|
||||
[pulTexture] "g" (pulTexture),
|
||||
[slBaseMipShift] "g" (slBaseMipShift),
|
||||
[slHeatRowStep] "g" (slHeatRowStep),
|
||||
[slHeatMapStep] "g" (slHeatMapStep)
|
||||
: "eax", "ecx", "edx", "esi", "edi", "cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
|
|
@ -125,6 +125,7 @@ static inline PIX PIXCoord(FLOAT f) // (f+0.9999f) or (ceil(f))
|
|||
|
||||
#elif (defined __GNU_INLINE__)
|
||||
PIX pixRet;
|
||||
SLONG clobber;
|
||||
__asm__ __volatile__ (
|
||||
"flds (%%eax) \n\t"
|
||||
"fistl (%%edx) \n\t"
|
||||
|
@ -134,7 +135,7 @@ static inline PIX PIXCoord(FLOAT f) // (f+0.9999f) or (ceil(f))
|
|||
"movl (%%ecx), %%edx \n\t"
|
||||
"addl $0x7FFFFFFF, %%edx \n\t"
|
||||
"adcl $0, %%eax \n\t"
|
||||
: "=a" (pixRet)
|
||||
: "=a" (pixRet), "=d" (clobber)
|
||||
: "a" (&f), "d" (&slTmp), "c" (&fDiff)
|
||||
: "cc", "memory"
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue
Block a user