mirror of
https://github.com/ptitSeb/Serious-Engine
synced 2024-11-22 10:20:26 +01:00
commit
a8c6c77309
|
@ -188,13 +188,20 @@ else()
|
||||||
set(DEBUGSUFFIX "")
|
set(DEBUGSUFFIX "")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# This should not be needed anymore, but might be faster on 32bit x86
|
option(USE_ASM "Use ASM code" TRUE)
|
||||||
option(USE_I386_ASM "Use X86 ASM" FALSE)
|
if (USE_ASM)
|
||||||
|
MESSAGE(STATUS "Using assembler code (when available)")
|
||||||
|
else()
|
||||||
|
add_definitions(-DUSE_PORTABLE_C=1)
|
||||||
|
MESSAGE(STATUS "Using portable C instead of all ASM")
|
||||||
|
endif()
|
||||||
|
|
||||||
if (USE_I386_ASM)
|
option(USE_I386_NASM_ASM "Use i386 nasm ASM code" FALSE)
|
||||||
|
|
||||||
|
if (USE_ASM AND USE_I386_NASM_ASM)
|
||||||
# You need the Netwide Assembler (NASM) to build this on Intel systems.
|
# You need the Netwide Assembler (NASM) to build this on Intel systems.
|
||||||
# http://nasm.sf.net/
|
# http://nasm.sf.net/
|
||||||
add_definitions(-DUSE_I386_ASM=1)
|
add_definitions(-DUSE_I386_NASM_ASM=1)
|
||||||
if (MACOSX)
|
if (MACOSX)
|
||||||
set(ASMOBJFMT "macho")
|
set(ASMOBJFMT "macho")
|
||||||
list(APPEND ASMFLAGS --prefix _)
|
list(APPEND ASMFLAGS --prefix _)
|
||||||
|
@ -203,10 +210,9 @@ if (USE_I386_ASM)
|
||||||
else()
|
else()
|
||||||
set(ASMOBJFMT "elf")
|
set(ASMOBJFMT "elf")
|
||||||
endif()
|
endif()
|
||||||
MESSAGE(STATUS "Using i386 assembler")
|
MESSAGE(STATUS "Using i386 nasm ASM")
|
||||||
else()
|
else()
|
||||||
add_definitions(-DUSE_PORTABLE_C=1)
|
MESSAGE(STATUS "Not using i386 nasm ASM")
|
||||||
MESSAGE(STATUS "Using portable C instead of ASM")
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
option(PANDORA "Compile for Pandora" FALSE)
|
option(PANDORA "Compile for Pandora" FALSE)
|
||||||
|
@ -655,7 +661,7 @@ add_dependencies(${SHADERSLIB} ParseEntities)
|
||||||
add_parser_and_scanner("Engine/Base/Parser" "Engine/Base/Scanner")
|
add_parser_and_scanner("Engine/Base/Parser" "Engine/Base/Scanner")
|
||||||
add_parser_and_scanner("Engine/Ska/smcPars" "Engine/Ska/smcScan")
|
add_parser_and_scanner("Engine/Ska/smcPars" "Engine/Ska/smcScan")
|
||||||
|
|
||||||
if (USE_I386_ASM)
|
if (USE_I386_NASM_ASM)
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT "SoundMixer386.o"
|
OUTPUT "SoundMixer386.o"
|
||||||
MAIN_DEPENDENCY "${CMAKE_CURRENT_SOURCE_DIR}/Engine/Sound/SoundMixer386.asm"
|
MAIN_DEPENDENCY "${CMAKE_CURRENT_SOURCE_DIR}/Engine/Sound/SoundMixer386.asm"
|
||||||
|
|
|
@ -65,9 +65,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
#else
|
#else
|
||||||
#warning "UNKNOWN PLATFORM IDENTIFIED!!!!"
|
#warning "UNKNOWN PLATFORM IDENTIFIED!!!!"
|
||||||
#define PLATFORM_UNKNOWN 1
|
#define PLATFORM_UNKNOWN 1
|
||||||
#warning "USING PORTABLE C!!!"
|
#endif
|
||||||
#define USE_PORTABLE_C
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if PLATFORM_LINUX || PLATFORM_MACOSX
|
#if PLATFORM_LINUX || PLATFORM_MACOSX
|
||||||
#ifndef PLATFORM_UNIX
|
#ifndef PLATFORM_UNIX
|
||||||
|
|
|
@ -21,24 +21,13 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
template class CStaticArray<CProfileCounter>;
|
template class CStaticArray<CProfileCounter>;
|
||||||
template class CStaticArray<CProfileTimer>;
|
template class CStaticArray<CProfileTimer>;
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined PLATFORM_UNIX) && !defined(__GNU_INLINE_X86_32__)
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline __int64 ReadTSC_profile(void)
|
static inline __int64 ReadTSC_profile(void)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
#ifdef __arm__
|
|
||||||
struct timespec tv;
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &tv);
|
|
||||||
return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_nsec) / 1000000) );
|
|
||||||
#else
|
|
||||||
struct timeval tv;
|
|
||||||
gettimeofday(&tv, NULL);
|
|
||||||
return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_usec) / 1000) );
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__int64 mmRet;
|
__int64 mmRet;
|
||||||
__asm {
|
__asm {
|
||||||
rdtsc
|
rdtsc
|
||||||
|
@ -47,7 +36,7 @@ static inline __int64 ReadTSC_profile(void)
|
||||||
}
|
}
|
||||||
return mmRet;
|
return mmRet;
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__int64 mmRet;
|
__int64 mmRet;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"rdtsc \n\t"
|
"rdtsc \n\t"
|
||||||
|
@ -60,7 +49,16 @@ static inline __int64 ReadTSC_profile(void)
|
||||||
return(mmRet);
|
return(mmRet);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Please implement for your platform/compiler.
|
#ifdef __arm__
|
||||||
|
struct timespec tv;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &tv);
|
||||||
|
return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_nsec) / 1000000) );
|
||||||
|
#else
|
||||||
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv, NULL);
|
||||||
|
return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_usec) / 1000) );
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
#include <Engine/Base/Priority.inl>
|
#include <Engine/Base/Priority.inl>
|
||||||
|
|
||||||
// !!! FIXME: use SDL timer code instead and rdtsc never?
|
// !!! FIXME: use SDL timer code instead and rdtsc never?
|
||||||
#if (USE_PORTABLE_C)
|
#if (defined PLATFORM_UNIX) && !defined(__GNU_INLINE_X86_32__)
|
||||||
#define USE_GETTIMEOFDAY 1
|
#define USE_GETTIMEOFDAY 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ static inline __int64 ReadTSC(void)
|
||||||
}
|
}
|
||||||
return mmRet;
|
return mmRet;
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__int64 mmRet;
|
__int64 mmRet;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"rdtsc \n\t"
|
"rdtsc \n\t"
|
||||||
|
|
|
@ -109,6 +109,30 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*));
|
||||||
#define ASMSYM(x) #x
|
#define ASMSYM(x) #x
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* should we enable inline asm? */
|
||||||
|
#ifndef USE_PORTABLE_C
|
||||||
|
#if defined(__MSVC_INLINE__)
|
||||||
|
/* the build system selected __MSVC_INLINE__ */
|
||||||
|
#elif defined(__GNU_INLINE_X86_32__)
|
||||||
|
/* the build system selected __GNU_INLINE_X86_32__ */
|
||||||
|
#elif defined(_MSC_VER) && defined(_M_IX86)
|
||||||
|
#define __MSVC_INLINE__
|
||||||
|
#elif defined (__GNUC__) && defined(__i386)
|
||||||
|
#define __GNU_INLINE_X86_32__
|
||||||
|
#elif defined (__GNUC__) && defined(__x86_64__)
|
||||||
|
#define __GNU_INLINE_X86_64__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNU_INLINE_X86_32__) || defined(__GNU_INLINE_X86_64__)
|
||||||
|
#define __GNU_INLINE_X86__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNU_INLINE_X86__)
|
||||||
|
#define FPU_REGS "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
|
||||||
|
#define MMX_REGS "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef PLATFORM_UNIX /* rcg10042001 */
|
#ifdef PLATFORM_UNIX /* rcg10042001 */
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
@ -134,25 +158,6 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*));
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ((defined __GNUC__) && (!defined __GNU_INLINE__))
|
|
||||||
#define __GNU_INLINE__
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (defined __INTEL_COMPILER)
|
|
||||||
#if ((!defined __GNU_INLINE__) && (!defined __MSVC_INLINE__))
|
|
||||||
#error Please define __GNU_INLINE__ or __MSVC_INLINE__ with Intel C++.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if ((defined __GNU_INLINE__) && (defined __MSVC_INLINE__))
|
|
||||||
#error Define either __GNU_INLINE__ or __MSVC_INLINE__ with Intel C++.
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__GNU_INLINE__) && defined(__i386__)
|
|
||||||
#define FPU_REGS "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
|
|
||||||
#define MMX_REGS "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef PAGESIZE
|
#ifndef PAGESIZE
|
||||||
#define PAGESIZE 4096
|
#define PAGESIZE 4096
|
||||||
#endif
|
#endif
|
||||||
|
@ -230,10 +235,7 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*));
|
||||||
|
|
||||||
inline ULONG _rotl(ULONG ul, int bits)
|
inline ULONG _rotl(ULONG ul, int bits)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __GNU_INLINE_X86_32__)
|
||||||
// DG: according to http://blog.regehr.org/archives/1063 this is fast
|
|
||||||
return (ul<<bits) | (ul>>(-bits&31));
|
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
// This, on the other hand, is wicked fast. :)
|
// This, on the other hand, is wicked fast. :)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"roll %%cl, %%eax \n\t"
|
"roll %%cl, %%eax \n\t"
|
||||||
|
@ -255,7 +257,8 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*));
|
||||||
return(ul);
|
return(ul);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error need inline asm for your platform.
|
// DG: according to http://blog.regehr.org/archives/1063 this is fast
|
||||||
|
return (ul<<bits) | (ul>>(-bits&31));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -125,14 +125,10 @@ BOOL APIENTRY DllMain( HANDLE hModule, DWORD ul_reason_for_call, LPVOID lpReser
|
||||||
|
|
||||||
static void DetectCPU(void)
|
static void DetectCPU(void)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C) // rcg10072001
|
char strVendor[12+1] = { 0 };
|
||||||
CPrintF(TRANSV(" (No CPU detection in this binary.)\n"));
|
|
||||||
|
|
||||||
#else
|
|
||||||
char strVendor[12+1];
|
|
||||||
strVendor[12] = 0;
|
strVendor[12] = 0;
|
||||||
ULONG ulTFMS;
|
ULONG ulTFMS = 0;
|
||||||
ULONG ulFeatures;
|
ULONG ulFeatures = 0;
|
||||||
|
|
||||||
#if (defined __MSVC_INLINE__)
|
#if (defined __MSVC_INLINE__)
|
||||||
// test MMX presence and update flag
|
// test MMX presence and update flag
|
||||||
|
@ -148,43 +144,47 @@ static void DetectCPU(void)
|
||||||
mov dword ptr [ulFeatures], edx
|
mov dword ptr [ulFeatures], edx
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86__)
|
||||||
|
ULONG eax, ebx, ecx, edx;
|
||||||
// test MMX presence and update flag
|
// test MMX presence and update flag
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"pushl %%ebx \n\t"
|
#if (defined __GNU_INLINE_X86_64__)
|
||||||
"xorl %%eax,%%eax \n\t" // request for basic id
|
|
||||||
"cpuid \n\t"
|
"cpuid \n\t"
|
||||||
"movl %%ebx, (%%esi) \n\t"
|
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
||||||
"movl %%edx, 4(%%esi) \n\t"
|
#else
|
||||||
"movl %%ecx, 8(%%esi) \n\t"
|
"movl %%ebx, %%esi \n\t"
|
||||||
"popl %%ebx \n\t"
|
"cpuid \n\t"
|
||||||
: // no specific outputs.
|
"xchgl %%ebx, %%esi \n\t"
|
||||||
: "S" (strVendor)
|
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)
|
||||||
: "eax", "ecx", "edx", "memory"
|
#endif
|
||||||
|
: "a" (0) // request for basic id
|
||||||
);
|
);
|
||||||
|
memcpy(strVendor + 0, &ebx, 4);
|
||||||
// need to break this into a separate asm block, since I'm clobbering
|
memcpy(strVendor + 4, &edx, 4);
|
||||||
// too many registers. There's something to be said for letting MSVC
|
memcpy(strVendor + 8, &ecx, 4);
|
||||||
// figure out where on the stack your locals are resting, but yeah,
|
|
||||||
// I know, that's x86-specific anyhow...
|
|
||||||
// !!! FIXME: can probably do this right with modern GCC.
|
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"pushl %%ebx \n\t"
|
#if (defined __GNU_INLINE_X86_64__)
|
||||||
"movl $1, %%eax \n\t" // request for TFMS feature flags
|
"cpuid \n\t"
|
||||||
"cpuid \n\t"
|
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
||||||
"mov %%eax, (%%esi) \n\t" // remember type, family, model and stepping
|
#else
|
||||||
"mov %%edx, (%%edi) \n\t"
|
"movl %%ebx, %%esi \n\t"
|
||||||
"popl %%ebx \n\t"
|
"cpuid \n\t"
|
||||||
: // no specific outputs.
|
"xchgl %%ebx, %%esi \n\t"
|
||||||
: "S" (&ulTFMS), "D" (&ulFeatures)
|
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)
|
||||||
: "eax", "ecx", "edx", "memory"
|
#endif
|
||||||
|
: "a" (1) // request for TFMS feature flags
|
||||||
);
|
);
|
||||||
|
ulTFMS = eax;
|
||||||
|
ulFeatures = edx;
|
||||||
|
|
||||||
#else
|
|
||||||
#error Please implement for your platform or define USE_PORTABLE_C.
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (ulTFMS == 0) {
|
||||||
|
CPrintF(TRANSV(" (No CPU detection in this binary.)\n"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
INDEX iType = (ulTFMS>>12)&0x3;
|
INDEX iType = (ulTFMS>>12)&0x3;
|
||||||
INDEX iFamily = (ulTFMS>> 8)&0xF;
|
INDEX iFamily = (ulTFMS>> 8)&0xF;
|
||||||
INDEX iModel = (ulTFMS>> 4)&0xF;
|
INDEX iModel = (ulTFMS>> 4)&0xF;
|
||||||
|
@ -215,8 +215,6 @@ static void DetectCPU(void)
|
||||||
sys_iCPUMHz = INDEX(_pTimer->tm_llCPUSpeedHZ/1E6);
|
sys_iCPUMHz = INDEX(_pTimer->tm_llCPUSpeedHZ/1E6);
|
||||||
|
|
||||||
if( !bMMX) FatalError( TRANS("MMX support required but not present!"));
|
if( !bMMX) FatalError( TRANS("MMX support required but not present!"));
|
||||||
|
|
||||||
#endif // defined USE_PORTABLE_C
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DetectCPUWrapper(void)
|
static void DetectCPUWrapper(void)
|
||||||
|
|
|
@ -247,30 +247,7 @@ COLOR MulColors( COLOR col1, COLOR col2)
|
||||||
if( col2==0xFFFFFFFF) return col1;
|
if( col2==0xFFFFFFFF) return col1;
|
||||||
if( col1==0 || col2==0) return 0;
|
if( col1==0 || col2==0) return 0;
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
// !!! FIXME: This...is not fast.
|
|
||||||
union
|
|
||||||
{
|
|
||||||
COLOR col;
|
|
||||||
UBYTE bytes[4];
|
|
||||||
} conv1;
|
|
||||||
|
|
||||||
union
|
|
||||||
{
|
|
||||||
COLOR col;
|
|
||||||
UBYTE bytes[4];
|
|
||||||
} conv2;
|
|
||||||
|
|
||||||
conv1.col = col1;
|
|
||||||
conv2.col = col2;
|
|
||||||
conv1.bytes[0] = (UBYTE) ((((DWORD) conv1.bytes[0]) * ((DWORD) conv2.bytes[0])) / 255);
|
|
||||||
conv1.bytes[1] = (UBYTE) ((((DWORD) conv1.bytes[1]) * ((DWORD) conv2.bytes[1])) / 255);
|
|
||||||
conv1.bytes[2] = (UBYTE) ((((DWORD) conv1.bytes[2]) * ((DWORD) conv2.bytes[2])) / 255);
|
|
||||||
conv1.bytes[3] = (UBYTE) ((((DWORD) conv1.bytes[3]) * ((DWORD) conv2.bytes[3])) / 255);
|
|
||||||
|
|
||||||
return(conv1.col);
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
COLOR colRet;
|
COLOR colRet;
|
||||||
__asm {
|
__asm {
|
||||||
xor ebx,ebx
|
xor ebx,ebx
|
||||||
|
@ -347,7 +324,7 @@ COLOR MulColors( COLOR col1, COLOR col2)
|
||||||
}
|
}
|
||||||
return colRet;
|
return colRet;
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
COLOR colRet;
|
COLOR colRet;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"pushl %%ebx \n\t"
|
"pushl %%ebx \n\t"
|
||||||
|
@ -433,20 +410,6 @@ COLOR MulColors( COLOR col1, COLOR col2)
|
||||||
|
|
||||||
return colRet;
|
return colRet;
|
||||||
#else
|
#else
|
||||||
#error please fill in inline assembly for your platform.
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// fast color additon function - RES = clamp (1ST + 2ND)
|
|
||||||
COLOR AddColors( COLOR col1, COLOR col2)
|
|
||||||
{
|
|
||||||
if( col1==0) return col2;
|
|
||||||
if( col2==0) return col1;
|
|
||||||
if( col1==0xFFFFFFFF || col2==0xFFFFFFFF) return 0xFFFFFFFF;
|
|
||||||
COLOR colRet;
|
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
|
||||||
// !!! FIXME: This...is not fast.
|
// !!! FIXME: This...is not fast.
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
|
@ -459,19 +422,28 @@ COLOR AddColors( COLOR col1, COLOR col2)
|
||||||
COLOR col;
|
COLOR col;
|
||||||
UBYTE bytes[4];
|
UBYTE bytes[4];
|
||||||
} conv2;
|
} conv2;
|
||||||
#define MINVAL(a, b) ((a)>(b))?(b):(a)
|
|
||||||
|
|
||||||
conv1.col = col1;
|
conv1.col = col1;
|
||||||
conv2.col = col2;
|
conv2.col = col2;
|
||||||
conv1.bytes[0] = (UBYTE) MINVAL((((WORD) conv1.bytes[0]) + ((WORD) conv2.bytes[0])) , 255);
|
conv1.bytes[0] = (UBYTE) ((((DWORD) conv1.bytes[0]) * ((DWORD) conv2.bytes[0])) / 255);
|
||||||
conv1.bytes[1] = (UBYTE) MINVAL((((WORD) conv1.bytes[1]) + ((WORD) conv2.bytes[1])) , 255);
|
conv1.bytes[1] = (UBYTE) ((((DWORD) conv1.bytes[1]) * ((DWORD) conv2.bytes[1])) / 255);
|
||||||
conv1.bytes[2] = (UBYTE) MINVAL((((WORD) conv1.bytes[2]) + ((WORD) conv2.bytes[2])) , 255);
|
conv1.bytes[2] = (UBYTE) ((((DWORD) conv1.bytes[2]) * ((DWORD) conv2.bytes[2])) / 255);
|
||||||
conv1.bytes[3] = (UBYTE) MINVAL((((WORD) conv1.bytes[3]) + ((WORD) conv2.bytes[3])) , 255);
|
conv1.bytes[3] = (UBYTE) ((((DWORD) conv1.bytes[3]) * ((DWORD) conv2.bytes[3])) / 255);
|
||||||
#undef MINVAL
|
|
||||||
|
|
||||||
colRet = conv1.col;
|
return(conv1.col);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
|
// fast color additon function - RES = clamp (1ST + 2ND)
|
||||||
|
COLOR AddColors( COLOR col1, COLOR col2)
|
||||||
|
{
|
||||||
|
if( col1==0) return col2;
|
||||||
|
if( col2==0) return col1;
|
||||||
|
if( col1==0xFFFFFFFF || col2==0xFFFFFFFF) return 0xFFFFFFFF;
|
||||||
|
COLOR colRet;
|
||||||
|
|
||||||
|
#if (defined __MSVC_INLINE__)
|
||||||
__asm {
|
__asm {
|
||||||
xor ebx,ebx
|
xor ebx,ebx
|
||||||
mov esi,255
|
mov esi,255
|
||||||
|
@ -535,7 +507,7 @@ COLOR AddColors( COLOR col1, COLOR col2)
|
||||||
mov D [colRet],ebx
|
mov D [colRet],ebx
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG tmp;
|
ULONG tmp;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
// if xbx is "r", gcc runs out of regs in -fPIC + -fno-omit-fp :(
|
// if xbx is "r", gcc runs out of regs in -fPIC + -fno-omit-fp :(
|
||||||
|
@ -608,7 +580,29 @@ COLOR AddColors( COLOR col1, COLOR col2)
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error please fill in inline assembly for your platform.
|
// !!! FIXME: This...is not fast.
|
||||||
|
union
|
||||||
|
{
|
||||||
|
COLOR col;
|
||||||
|
UBYTE bytes[4];
|
||||||
|
} conv1;
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
COLOR col;
|
||||||
|
UBYTE bytes[4];
|
||||||
|
} conv2;
|
||||||
|
#define MINVAL(a, b) ((a)>(b))?(b):(a)
|
||||||
|
|
||||||
|
conv1.col = col1;
|
||||||
|
conv2.col = col2;
|
||||||
|
conv1.bytes[0] = (UBYTE) MINVAL((((WORD) conv1.bytes[0]) + ((WORD) conv2.bytes[0])) , 255);
|
||||||
|
conv1.bytes[1] = (UBYTE) MINVAL((((WORD) conv1.bytes[1]) + ((WORD) conv2.bytes[1])) , 255);
|
||||||
|
conv1.bytes[2] = (UBYTE) MINVAL((((WORD) conv1.bytes[2]) + ((WORD) conv2.bytes[2])) , 255);
|
||||||
|
conv1.bytes[3] = (UBYTE) MINVAL((((WORD) conv1.bytes[3]) + ((WORD) conv2.bytes[3])) , 255);
|
||||||
|
#undef MINVAL
|
||||||
|
|
||||||
|
colRet = conv1.col;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return colRet;
|
return colRet;
|
||||||
|
@ -619,14 +613,7 @@ COLOR AddColors( COLOR col1, COLOR col2)
|
||||||
// multiple conversion from OpenGL color to DirectX color
|
// multiple conversion from OpenGL color to DirectX color
|
||||||
extern void abgr2argb( ULONG *pulSrc, ULONG *pulDst, INDEX ct)
|
extern void abgr2argb( ULONG *pulSrc, ULONG *pulDst, INDEX ct)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
//#error write me.
|
|
||||||
for (int i=0; i<ct; i++) {
|
|
||||||
ULONG tmp = pulSrc[i];
|
|
||||||
pulDst[i] = (tmp&0xff00ff00) | ((tmp&0x00ff0000)>>16) | ((tmp&0x000000ff)<<16);
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
mov esi,dword ptr [pulSrc]
|
mov esi,dword ptr [pulSrc]
|
||||||
mov edi,dword ptr [pulDst]
|
mov edi,dword ptr [pulDst]
|
||||||
|
@ -678,12 +665,12 @@ colSkip2:
|
||||||
mov dword ptr [edi],eax
|
mov dword ptr [edi],eax
|
||||||
colSkip1:
|
colSkip1:
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
STUBBED("convert to inline asm.");
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error please fill in inline assembly for your platform.
|
for (int i=0; i<ct; i++) {
|
||||||
|
ULONG tmp = pulSrc[i];
|
||||||
|
pulDst[i] = (tmp&0xff00ff00) | ((tmp&0x00ff0000)>>16) | ((tmp&0x000000ff)<<16);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -204,19 +204,7 @@ ENGINE_API extern COLOR AddColors( COLOR col1, COLOR col2); // fast color addito
|
||||||
__forceinline ULONG ByteSwap( ULONG ul)
|
__forceinline ULONG ByteSwap( ULONG ul)
|
||||||
{
|
{
|
||||||
/* rcg10052001 Platform-wrappers. */
|
/* rcg10052001 Platform-wrappers. */
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
ul = ( ((ul << 24) ) |
|
|
||||||
((ul << 8) & 0x00FF0000) |
|
|
||||||
((ul >> 8) & 0x0000FF00) |
|
|
||||||
((ul >> 24) ) );
|
|
||||||
|
|
||||||
#if (defined PLATFORM_BIGENDIAN)
|
|
||||||
BYTESWAP(ul); // !!! FIXME: May not be right!
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return(ul);
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
ULONG ulRet;
|
ULONG ulRet;
|
||||||
__asm {
|
__asm {
|
||||||
mov eax,dword ptr [ul]
|
mov eax,dword ptr [ul]
|
||||||
|
@ -225,7 +213,7 @@ __forceinline ULONG ByteSwap( ULONG ul)
|
||||||
}
|
}
|
||||||
return ulRet;
|
return ulRet;
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"bswapl %%eax \n\t"
|
"bswapl %%eax \n\t"
|
||||||
: "=a" (ul)
|
: "=a" (ul)
|
||||||
|
@ -234,16 +222,22 @@ __forceinline ULONG ByteSwap( ULONG ul)
|
||||||
return(ul);
|
return(ul);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error please define for your platform.
|
ul = ( ((ul << 24) ) |
|
||||||
|
((ul << 8) & 0x00FF0000) |
|
||||||
|
((ul >> 8) & 0x0000FF00) |
|
||||||
|
((ul >> 24) ) );
|
||||||
|
|
||||||
|
#if (defined PLATFORM_BIGENDIAN)
|
||||||
|
BYTESWAP(ul); // !!! FIXME: May not be right!
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return(ul);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline ULONG rgba2argb( ULONG ul)
|
__forceinline ULONG rgba2argb( ULONG ul)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
return( (ul << 24) | (ul >> 8) );
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
ULONG ulRet;
|
ULONG ulRet;
|
||||||
__asm {
|
__asm {
|
||||||
mov eax,dword ptr [ul]
|
mov eax,dword ptr [ul]
|
||||||
|
@ -252,7 +246,7 @@ __forceinline ULONG rgba2argb( ULONG ul)
|
||||||
}
|
}
|
||||||
return ulRet;
|
return ulRet;
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG ulRet;
|
ULONG ulRet;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"rorl $8, %%eax \n\t"
|
"rorl $8, %%eax \n\t"
|
||||||
|
@ -263,21 +257,14 @@ __forceinline ULONG rgba2argb( ULONG ul)
|
||||||
return ulRet;
|
return ulRet;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error please define for your platform.
|
return (ul << 24) | (ul >> 8);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline ULONG abgr2argb( COLOR col)
|
__forceinline ULONG abgr2argb( COLOR col)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
// this could be simplified, this is just a safe conversion from asm code
|
|
||||||
col = ( ((col << 24) ) |
|
|
||||||
((col << 8) & 0x00FF0000) |
|
|
||||||
((col >> 8) & 0x0000FF00) |
|
|
||||||
((col >> 24) ) );
|
|
||||||
return( (col << 24) | (col >> 8) );
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
ULONG ulRet;
|
ULONG ulRet;
|
||||||
__asm {
|
__asm {
|
||||||
mov eax,dword ptr [col]
|
mov eax,dword ptr [col]
|
||||||
|
@ -287,7 +274,7 @@ __forceinline ULONG abgr2argb( COLOR col)
|
||||||
}
|
}
|
||||||
return ulRet;
|
return ulRet;
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG ulRet;
|
ULONG ulRet;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"bswapl %%eax \n\t"
|
"bswapl %%eax \n\t"
|
||||||
|
@ -299,7 +286,13 @@ __forceinline ULONG abgr2argb( COLOR col)
|
||||||
return ulRet;
|
return ulRet;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error please define for your platform.
|
// this could be simplified, this is just a safe conversion from asm code
|
||||||
|
col = ( ((col << 24) ) |
|
||||||
|
((col << 8) & 0x00FF0000) |
|
||||||
|
((col >> 8) & 0x0000FF00) |
|
||||||
|
((col >> 24) ) );
|
||||||
|
return( (col << 24) | (col >> 8) );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -311,10 +304,7 @@ extern void abgr2argb( ULONG *pulSrc, ULONG *pulDst, INDEX ct);
|
||||||
// fast memory copy of ULONGs
|
// fast memory copy of ULONGs
|
||||||
inline void CopyLongs( ULONG *pulSrc, ULONG *pulDst, INDEX ctLongs)
|
inline void CopyLongs( ULONG *pulSrc, ULONG *pulDst, INDEX ctLongs)
|
||||||
{
|
{
|
||||||
#if ((defined USE_PORTABLE_C) || (PLATFORM_MACOSX))
|
#if (defined __MSVC_INLINE__)
|
||||||
memcpy( pulDst, pulSrc, ctLongs*4);
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
cld
|
cld
|
||||||
mov esi,dword ptr [pulSrc]
|
mov esi,dword ptr [pulSrc]
|
||||||
|
@ -322,23 +312,8 @@ inline void CopyLongs( ULONG *pulSrc, ULONG *pulDst, INDEX ctLongs)
|
||||||
mov ecx,dword ptr [ctLongs]
|
mov ecx,dword ptr [ctLongs]
|
||||||
rep movsd
|
rep movsd
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
// I haven't benchmarked it, but in many cases, memcpy() becomes an
|
|
||||||
// inline (asm?) macro on GNU platforms, so this might not be a
|
|
||||||
// speed gain at all over the USE_PORTABLE_C version.
|
|
||||||
// You Have Been Warned. --ryan.
|
|
||||||
__asm__ __volatile__ (
|
|
||||||
"cld \n\t"
|
|
||||||
"rep \n\t"
|
|
||||||
"movsd \n\t"
|
|
||||||
: "=S" (pulSrc), "=D" (pulDst), "=c" (ctLongs)
|
|
||||||
: "S" (pulSrc), "D" (pulDst), "c" (ctLongs)
|
|
||||||
: "cc", "memory"
|
|
||||||
);
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
# error Please fill this in for your platform.
|
memcpy( pulDst, pulSrc, ctLongs*4);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -346,11 +321,7 @@ inline void CopyLongs( ULONG *pulSrc, ULONG *pulDst, INDEX ctLongs)
|
||||||
// fast memory set of ULONGs
|
// fast memory set of ULONGs
|
||||||
inline void StoreLongs( ULONG ulVal, ULONG *pulDst, INDEX ctLongs)
|
inline void StoreLongs( ULONG ulVal, ULONG *pulDst, INDEX ctLongs)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
for( INDEX i=0; i<ctLongs; i++)
|
|
||||||
pulDst[i] = ulVal;
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
cld
|
cld
|
||||||
mov eax,dword ptr [ulVal]
|
mov eax,dword ptr [ulVal]
|
||||||
|
@ -359,7 +330,7 @@ inline void StoreLongs( ULONG ulVal, ULONG *pulDst, INDEX ctLongs)
|
||||||
rep stosd
|
rep stosd
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"cld \n\t"
|
"cld \n\t"
|
||||||
"rep \n\t"
|
"rep \n\t"
|
||||||
|
@ -370,7 +341,9 @@ inline void StoreLongs( ULONG ulVal, ULONG *pulDst, INDEX ctLongs)
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
# error Please fill this in for your platform.
|
for( INDEX i=0; i<ctLongs; i++)
|
||||||
|
pulDst[i] = ulVal;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,16 +38,6 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
#define W word ptr
|
#define W word ptr
|
||||||
#define B byte ptr
|
#define B byte ptr
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
|
||||||
#define ASMOPT 0
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
#define ASMOPT 1
|
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
#define ASMOPT 1
|
|
||||||
#else
|
|
||||||
#define ASMOPT 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MAXTEXUNITS 4
|
#define MAXTEXUNITS 4
|
||||||
#define SHADOWTEXTURE 3
|
#define SHADOWTEXTURE 3
|
||||||
|
|
||||||
|
@ -153,8 +143,7 @@ void AddElements( ScenePolygon *pspo)
|
||||||
const INDEX ctElems = pspo->spo_ctElements;
|
const INDEX ctElems = pspo->spo_ctElements;
|
||||||
INDEX *piDst = _aiElements.Push(ctElems);
|
INDEX *piDst = _aiElements.Push(ctElems);
|
||||||
|
|
||||||
#if (ASMOPT == 1)
|
#if (defined __MSVC_INLINE__)
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
mov eax,D [pspo]
|
mov eax,D [pspo]
|
||||||
mov ecx,D [ctElems]
|
mov ecx,D [ctElems]
|
||||||
|
@ -184,7 +173,7 @@ elemRest:
|
||||||
mov D [edi],eax
|
mov D [edi],eax
|
||||||
elemDone:
|
elemDone:
|
||||||
}
|
}
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"movl %[ctElems], %%ecx \n\t"
|
"movl %[ctElems], %%ecx \n\t"
|
||||||
"movl %[piDst], %%edi \n\t"
|
"movl %[piDst], %%edi \n\t"
|
||||||
|
@ -219,11 +208,6 @@ elemDone:
|
||||||
"cc", "memory"
|
"cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
|
||||||
#error Please write inline ASM for your platform.
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
const INDEX iVtx0Pass = pspo->spo_iVtx0Pass;
|
const INDEX iVtx0Pass = pspo->spo_iVtx0Pass;
|
||||||
const INDEX *piSrc = pspo->spo_piElements;
|
const INDEX *piSrc = pspo->spo_piElements;
|
||||||
|
@ -495,9 +479,7 @@ static void RSBinToGroups( ScenePolygon *pspoFirst)
|
||||||
// determine maximum used groups
|
// determine maximum used groups
|
||||||
ASSERT( _ctGroupsCount);
|
ASSERT( _ctGroupsCount);
|
||||||
|
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
|
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
mov eax,2
|
mov eax,2
|
||||||
bsr ecx,D [_ctGroupsCount]
|
bsr ecx,D [_ctGroupsCount]
|
||||||
|
@ -505,7 +487,7 @@ static void RSBinToGroups( ScenePolygon *pspoFirst)
|
||||||
mov D [_ctGroupsCount],eax
|
mov D [_ctGroupsCount],eax
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"movl $2, %%eax \n\t"
|
"movl $2, %%eax \n\t"
|
||||||
"bsrl (%%esi), %%ecx \n\t"
|
"bsrl (%%esi), %%ecx \n\t"
|
||||||
|
@ -516,11 +498,6 @@ static void RSBinToGroups( ScenePolygon *pspoFirst)
|
||||||
: "eax", "ecx", "cc", "memory"
|
: "eax", "ecx", "cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
|
||||||
#error Please write inline ASM for your platform.
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
// emulate x86's bsr opcode...not fast. :/
|
// emulate x86's bsr opcode...not fast. :/
|
||||||
register DWORD val = _ctGroupsCount;
|
register DWORD val = _ctGroupsCount;
|
||||||
|
@ -858,10 +835,7 @@ static void RSSetTextureCoords( ScenePolygon *pspoGroup, INDEX iLayer, INDEX iUn
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// !!! FIXME: rcg11232001 This inline conversion is broken. Use the
|
#if (defined __MSVC_INLINE__)
|
||||||
// !!! FIXME: rcg11232001 C version for now with GCC.
|
|
||||||
#if ((ASMOPT == 1) && (!defined __GNU_INLINE__) && (!defined __INTEL_COMPILER))
|
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
mov esi,D [pspo]
|
mov esi,D [pspo]
|
||||||
mov edi,D [iMappingOffset]
|
mov edi,D [iMappingOffset]
|
||||||
|
@ -915,7 +889,7 @@ vtxLoop:
|
||||||
/*
|
/*
|
||||||
// !!! FIXME: rcg11232001 This inline conversion is broken. Use the
|
// !!! FIXME: rcg11232001 This inline conversion is broken. Use the
|
||||||
// !!! FIXME: rcg11232001 C version for now on Linux.
|
// !!! FIXME: rcg11232001 C version for now on Linux.
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
STUBBED("debug this");
|
STUBBED("debug this");
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"0: \n\t" // vtxLoop
|
"0: \n\t" // vtxLoop
|
||||||
|
@ -956,11 +930,6 @@ vtxLoop:
|
||||||
);
|
);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#else
|
|
||||||
#error Please write inline ASM for your platform.
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// diffuse mapping
|
// diffuse mapping
|
||||||
|
|
|
@ -67,18 +67,7 @@ ULONG PrepareTexture( UBYTE *pubTexture, PIX pixSizeI, PIX pixSizeJ)
|
||||||
// need to upload from RGBA format
|
// need to upload from RGBA format
|
||||||
const PIX pixTextureSize = pixSizeI*pixSizeJ;
|
const PIX pixTextureSize = pixSizeI*pixSizeJ;
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
const UBYTE* src = pubTexture;
|
|
||||||
DWORD* dst = (DWORD*)(pubTexture+pixTextureSize);
|
|
||||||
for (int i=0; i<pixTextureSize; i++) {
|
|
||||||
const DWORD tmp = ((DWORD)*src) | 0xFFFFFF00;
|
|
||||||
*dst = ((tmp << 24) & 0xff000000 ) | ((tmp << 8) & 0x00ff0000 ) |
|
|
||||||
((tmp >> 8) & 0x0000ff00 ) | ((tmp >> 24) & 0x000000ff );
|
|
||||||
src++;
|
|
||||||
dst++;
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
mov esi,D [pubTexture]
|
mov esi,D [pubTexture]
|
||||||
mov edi,D [pubTexture]
|
mov edi,D [pubTexture]
|
||||||
|
@ -95,7 +84,7 @@ pixLoop:
|
||||||
jnz pixLoop
|
jnz pixLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"movl %[pubTexture], %%esi \n\t"
|
"movl %[pubTexture], %%esi \n\t"
|
||||||
"movl %[pixTextureSize], %%ecx \n\t"
|
"movl %[pixTextureSize], %%ecx \n\t"
|
||||||
|
@ -115,10 +104,18 @@ pixLoop:
|
||||||
: "eax", "ecx", "esi", "edi", "cc", "memory"
|
: "eax", "ecx", "esi", "edi", "cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Write inline ASM for your platform.
|
const UBYTE* src = pubTexture;
|
||||||
|
DWORD* dst = (DWORD*)(pubTexture+pixTextureSize);
|
||||||
|
for (int i=0; i<pixTextureSize; i++) {
|
||||||
|
const DWORD tmp = ((DWORD)*src) | 0xFFFFFF00;
|
||||||
|
*dst = ((tmp << 24) & 0xff000000 ) | ((tmp << 8) & 0x00ff0000 ) |
|
||||||
|
((tmp >> 8) & 0x0000ff00 ) | ((tmp >> 24) & 0x000000ff );
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// determine internal format
|
// determine internal format
|
||||||
extern INDEX gap_bAllowGrayTextures;
|
extern INDEX gap_bAllowGrayTextures;
|
||||||
|
|
|
@ -169,32 +169,7 @@ void UploadTexture_OGL( ULONG *pulTexture, PIX pixSizeU, PIX pixSizeV,
|
||||||
if( pixSizeV==0) pixSizeV=1;
|
if( pixSizeV==0) pixSizeV=1;
|
||||||
pixSize = pixSizeU*pixSizeV;
|
pixSize = pixSizeU*pixSizeV;
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
// Basically average every other pixel...
|
|
||||||
UWORD w = 0;
|
|
||||||
UBYTE *dptr = (UBYTE *) pulDst;
|
|
||||||
UBYTE *sptr = (UBYTE *) pulSrc;
|
|
||||||
#if 0
|
|
||||||
pixSize *= 4;
|
|
||||||
for (PIX i = 0; i < pixSize; i++)
|
|
||||||
{
|
|
||||||
*dptr = (UBYTE) ( (((UWORD) sptr[0]) + ((UWORD) sptr[1])) >> 1 );
|
|
||||||
dptr++;
|
|
||||||
sptr += 2;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
for (PIX i = 0; i < pixSize; i++)
|
|
||||||
{
|
|
||||||
for (PIX j = 0; j < 4; j++)
|
|
||||||
{
|
|
||||||
*dptr = (UBYTE) ( (((UWORD) sptr[0]) + ((UWORD) sptr[4])) >> 1 );
|
|
||||||
dptr++;
|
|
||||||
sptr++;
|
|
||||||
}
|
|
||||||
sptr += 4;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
pxor mm0,mm0
|
pxor mm0,mm0
|
||||||
mov esi,D [pulSrc]
|
mov esi,D [pulSrc]
|
||||||
|
@ -216,7 +191,7 @@ void UploadTexture_OGL( ULONG *pulTexture, PIX pixSizeU, PIX pixSizeV,
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"pxor %%mm0,%%mm0 \n\t"
|
"pxor %%mm0,%%mm0 \n\t"
|
||||||
"movl %[pulSrc],%%esi \n\t"
|
"movl %[pulSrc],%%esi \n\t"
|
||||||
|
@ -244,7 +219,30 @@ void UploadTexture_OGL( ULONG *pulTexture, PIX pixSizeU, PIX pixSizeV,
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Please write inline ASM for your platform.
|
// Basically average every other pixel...
|
||||||
|
UWORD w = 0;
|
||||||
|
UBYTE *dptr = (UBYTE *) pulDst;
|
||||||
|
UBYTE *sptr = (UBYTE *) pulSrc;
|
||||||
|
#if 0
|
||||||
|
pixSize *= 4;
|
||||||
|
for (PIX i = 0; i < pixSize; i++)
|
||||||
|
{
|
||||||
|
*dptr = (UBYTE) ( (((UWORD) sptr[0]) + ((UWORD) sptr[1])) >> 1 );
|
||||||
|
dptr++;
|
||||||
|
sptr += 2;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
for (PIX i = 0; i < pixSize; i++)
|
||||||
|
{
|
||||||
|
for (PIX j = 0; j < 4; j++)
|
||||||
|
{
|
||||||
|
*dptr = (UBYTE) ( (((UWORD) sptr[0]) + ((UWORD) sptr[4])) >> 1 );
|
||||||
|
dptr++;
|
||||||
|
sptr++;
|
||||||
|
}
|
||||||
|
sptr += 4;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// upload mipmap
|
// upload mipmap
|
||||||
|
|
|
@ -209,7 +209,92 @@ static void MakeOneMipmap( ULONG *pulSrcMipmap, ULONG *pulDstMipmap, PIX pixWidt
|
||||||
if( bBilinear) // type of filtering?
|
if( bBilinear) // type of filtering?
|
||||||
{ // BILINEAR
|
{ // BILINEAR
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
|
__asm {
|
||||||
|
pxor mm0,mm0
|
||||||
|
mov ebx,D [pixWidth]
|
||||||
|
mov esi,D [pulSrcMipmap]
|
||||||
|
mov edi,D [pulDstMipmap]
|
||||||
|
mov edx,D [pixHeight]
|
||||||
|
rowLoop:
|
||||||
|
mov ecx,D [pixWidth]
|
||||||
|
pixLoopN:
|
||||||
|
movd mm1,D [esi+ 0] // up-left
|
||||||
|
movd mm2,D [esi+ 4] // up-right
|
||||||
|
movd mm3,D [esi+ ebx*8 +0] // down-left
|
||||||
|
movd mm4,D [esi+ ebx*8 +4] // down-right
|
||||||
|
punpcklbw mm1,mm0
|
||||||
|
punpcklbw mm2,mm0
|
||||||
|
punpcklbw mm3,mm0
|
||||||
|
punpcklbw mm4,mm0
|
||||||
|
paddw mm1,mm2
|
||||||
|
paddw mm1,mm3
|
||||||
|
paddw mm1,mm4
|
||||||
|
paddw mm1,Q [mmRounder]
|
||||||
|
psrlw mm1,2
|
||||||
|
packuswb mm1,mm0
|
||||||
|
movd D [edi],mm1
|
||||||
|
// advance to next pixel
|
||||||
|
add esi,4*2
|
||||||
|
add edi,4
|
||||||
|
dec ecx
|
||||||
|
jnz pixLoopN
|
||||||
|
// advance to next row
|
||||||
|
lea esi,[esi+ ebx*8] // skip one row in source mip-map
|
||||||
|
dec edx
|
||||||
|
jnz rowLoop
|
||||||
|
emms
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"pxor %%mm0, %%mm0 \n\t"
|
||||||
|
"movl %[pulSrcMipmap], %%esi \n\t"
|
||||||
|
"movl %[pulDstMipmap], %%edi \n\t"
|
||||||
|
"movl %[pixHeight], %%edx \n\t"
|
||||||
|
|
||||||
|
"0: \n\t" // rowLoop
|
||||||
|
"movl %[pixWidth], %%ecx \n\t"
|
||||||
|
|
||||||
|
"1: \n\t" // pixLoopN
|
||||||
|
"movd 0(%%esi), %%mm1 \n\t" // up-left
|
||||||
|
"movd 4(%%esi), %%mm2 \n\t" // up-right
|
||||||
|
"movd 0(%%esi, %[pixWidth], 8), %%mm3 \n\t" // down-left
|
||||||
|
"movd 4(%%esi, %[pixWidth], 8), %%mm4 \n\t" // down-right
|
||||||
|
"punpcklbw %%mm0, %%mm1 \n\t"
|
||||||
|
"punpcklbw %%mm0, %%mm2 \n\t"
|
||||||
|
"punpcklbw %%mm0, %%mm3 \n\t"
|
||||||
|
"punpcklbw %%mm0, %%mm4 \n\t"
|
||||||
|
"paddw %%mm2, %%mm1 \n\t"
|
||||||
|
"paddw %%mm3, %%mm1 \n\t"
|
||||||
|
"paddw %%mm4, %%mm1 \n\t"
|
||||||
|
"paddw (" ASMSYM(mmRounder) "), %%mm1 \n\t"
|
||||||
|
"psrlw $2, %%mm1 \n\t"
|
||||||
|
"packuswb %%mm0, %%mm1 \n\t"
|
||||||
|
"movd %%mm1, (%%edi) \n\t"
|
||||||
|
|
||||||
|
// advance to next pixel
|
||||||
|
"addl $8, %%esi \n\t"
|
||||||
|
"addl $4, %%edi \n\t"
|
||||||
|
"decl %%ecx \n\t"
|
||||||
|
"jnz 1b \n\t" // pixLoopN
|
||||||
|
|
||||||
|
// advance to next row
|
||||||
|
// skip one row in source mip-map
|
||||||
|
"leal 0(%%esi, %[pixWidth], 8), %%esi \n\t"
|
||||||
|
"decl %%edx \n\t"
|
||||||
|
"jnz 0b \n\t" // rowLoop
|
||||||
|
"emms \n\t"
|
||||||
|
: // no outputs.
|
||||||
|
: [pixWidth] "r" (pixWidth),
|
||||||
|
[pulSrcMipmap] "g" (pulSrcMipmap),
|
||||||
|
[pulDstMipmap] "g" (pulDstMipmap),
|
||||||
|
[pixHeight] "g" (pixHeight)
|
||||||
|
: FPU_REGS, MMX_REGS, "ecx", "edx", "esi", "edi",
|
||||||
|
"cc", "memory"
|
||||||
|
);
|
||||||
|
|
||||||
|
#else
|
||||||
UBYTE *src = (UBYTE *) pulSrcMipmap;
|
UBYTE *src = (UBYTE *) pulSrcMipmap;
|
||||||
UBYTE *dest = (UBYTE *) pulDstMipmap;
|
UBYTE *dest = (UBYTE *) pulDstMipmap;
|
||||||
for (int i = 0 ; i < pixHeight; i++)
|
for (int i = 0 ; i < pixHeight; i++)
|
||||||
|
@ -260,129 +345,13 @@ static void MakeOneMipmap( ULONG *pulSrcMipmap, ULONG *pulDstMipmap, PIX pixWidt
|
||||||
src += 8*pixWidth;
|
src += 8*pixWidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
|
||||||
pxor mm0,mm0
|
|
||||||
mov ebx,D [pixWidth]
|
|
||||||
mov esi,D [pulSrcMipmap]
|
|
||||||
mov edi,D [pulDstMipmap]
|
|
||||||
mov edx,D [pixHeight]
|
|
||||||
rowLoop:
|
|
||||||
mov ecx,D [pixWidth]
|
|
||||||
pixLoopN:
|
|
||||||
movd mm1,D [esi+ 0] // up-left
|
|
||||||
movd mm2,D [esi+ 4] // up-right
|
|
||||||
movd mm3,D [esi+ ebx*8 +0] // down-left
|
|
||||||
movd mm4,D [esi+ ebx*8 +4] // down-right
|
|
||||||
punpcklbw mm1,mm0
|
|
||||||
punpcklbw mm2,mm0
|
|
||||||
punpcklbw mm3,mm0
|
|
||||||
punpcklbw mm4,mm0
|
|
||||||
paddw mm1,mm2
|
|
||||||
paddw mm1,mm3
|
|
||||||
paddw mm1,mm4
|
|
||||||
paddw mm1,Q [mmRounder]
|
|
||||||
psrlw mm1,2
|
|
||||||
packuswb mm1,mm0
|
|
||||||
movd D [edi],mm1
|
|
||||||
// advance to next pixel
|
|
||||||
add esi,4*2
|
|
||||||
add edi,4
|
|
||||||
dec ecx
|
|
||||||
jnz pixLoopN
|
|
||||||
// advance to next row
|
|
||||||
lea esi,[esi+ ebx*8] // skip one row in source mip-map
|
|
||||||
dec edx
|
|
||||||
jnz rowLoop
|
|
||||||
emms
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
__asm__ __volatile__ (
|
|
||||||
"pxor %%mm0, %%mm0 \n\t"
|
|
||||||
"movl %[pulSrcMipmap], %%esi \n\t"
|
|
||||||
"movl %[pulDstMipmap], %%edi \n\t"
|
|
||||||
"movl %[pixHeight], %%edx \n\t"
|
|
||||||
|
|
||||||
"0: \n\t" // rowLoop
|
|
||||||
"movl %[pixWidth], %%ecx \n\t"
|
|
||||||
|
|
||||||
"1: \n\t" // pixLoopN
|
|
||||||
"movd 0(%%esi), %%mm1 \n\t" // up-left
|
|
||||||
"movd 4(%%esi), %%mm2 \n\t" // up-right
|
|
||||||
"movd 0(%%esi, %[pixWidth], 8), %%mm3 \n\t" // down-left
|
|
||||||
"movd 4(%%esi, %[pixWidth], 8), %%mm4 \n\t" // down-right
|
|
||||||
"punpcklbw %%mm0, %%mm1 \n\t"
|
|
||||||
"punpcklbw %%mm0, %%mm2 \n\t"
|
|
||||||
"punpcklbw %%mm0, %%mm3 \n\t"
|
|
||||||
"punpcklbw %%mm0, %%mm4 \n\t"
|
|
||||||
"paddw %%mm2, %%mm1 \n\t"
|
|
||||||
"paddw %%mm3, %%mm1 \n\t"
|
|
||||||
"paddw %%mm4, %%mm1 \n\t"
|
|
||||||
"paddw (" ASMSYM(mmRounder) "), %%mm1 \n\t"
|
|
||||||
"psrlw $2, %%mm1 \n\t"
|
|
||||||
"packuswb %%mm0, %%mm1 \n\t"
|
|
||||||
"movd %%mm1, (%%edi) \n\t"
|
|
||||||
|
|
||||||
// advance to next pixel
|
|
||||||
"addl $8, %%esi \n\t"
|
|
||||||
"addl $4, %%edi \n\t"
|
|
||||||
"decl %%ecx \n\t"
|
|
||||||
"jnz 1b \n\t" // pixLoopN
|
|
||||||
|
|
||||||
// advance to next row
|
|
||||||
// skip one row in source mip-map
|
|
||||||
"leal 0(%%esi, %[pixWidth], 8), %%esi \n\t"
|
|
||||||
"decl %%edx \n\t"
|
|
||||||
"jnz 0b \n\t" // rowLoop
|
|
||||||
"emms \n\t"
|
|
||||||
: // no outputs.
|
|
||||||
: [pixWidth] "r" (pixWidth),
|
|
||||||
[pulSrcMipmap] "g" (pulSrcMipmap),
|
|
||||||
[pulDstMipmap] "g" (pulDstMipmap),
|
|
||||||
[pixHeight] "g" (pixHeight)
|
|
||||||
: FPU_REGS, MMX_REGS, "ecx", "edx", "esi", "edi",
|
|
||||||
"cc", "memory"
|
|
||||||
);
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error Write inline asm for your platform.
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{ // NEAREST-NEIGHBOUR but with border preserving
|
{ // NEAREST-NEIGHBOUR but with border preserving
|
||||||
ULONG ulRowModulo = pixWidth*2 *BYTES_PER_TEXEL;
|
ULONG ulRowModulo = pixWidth*2 *BYTES_PER_TEXEL;
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
|
|
||||||
PIX offset = 0;
|
|
||||||
ulRowModulo /= 4;
|
|
||||||
|
|
||||||
for (int q = 0; q < 2; q++)
|
|
||||||
{
|
|
||||||
for (PIX i = pixHeight / 2; i > 0; i--)
|
|
||||||
{
|
|
||||||
for (PIX j = pixWidth / 2; j > 0; j--)
|
|
||||||
{
|
|
||||||
*pulDstMipmap = *(pulSrcMipmap + offset);
|
|
||||||
pulSrcMipmap += 2;
|
|
||||||
pulDstMipmap++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (PIX j = pixWidth / 2; j > 0; j--)
|
|
||||||
{
|
|
||||||
*pulDstMipmap = *(pulSrcMipmap + offset + 1);
|
|
||||||
pulSrcMipmap += 2;
|
|
||||||
pulDstMipmap++;
|
|
||||||
}
|
|
||||||
|
|
||||||
pulSrcMipmap += ulRowModulo;
|
|
||||||
}
|
|
||||||
|
|
||||||
offset = pixWidth * 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
xor ebx,ebx
|
xor ebx,ebx
|
||||||
mov esi,D [pulSrcMipmap]
|
mov esi,D [pulSrcMipmap]
|
||||||
|
@ -428,7 +397,7 @@ halfEnd:
|
||||||
fullEnd:
|
fullEnd:
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG tmp, tmp2;
|
ULONG tmp, tmp2;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"xorl %[xbx], %[xbx] \n\t"
|
"xorl %[xbx], %[xbx] \n\t"
|
||||||
|
@ -493,7 +462,33 @@ fullEnd:
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Write inline asm for your platform.
|
PIX offset = 0;
|
||||||
|
ulRowModulo /= 4;
|
||||||
|
|
||||||
|
for (int q = 0; q < 2; q++)
|
||||||
|
{
|
||||||
|
for (PIX i = pixHeight / 2; i > 0; i--)
|
||||||
|
{
|
||||||
|
for (PIX j = pixWidth / 2; j > 0; j--)
|
||||||
|
{
|
||||||
|
*pulDstMipmap = *(pulSrcMipmap + offset);
|
||||||
|
pulSrcMipmap += 2;
|
||||||
|
pulDstMipmap++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (PIX j = pixWidth / 2; j > 0; j--)
|
||||||
|
{
|
||||||
|
*pulDstMipmap = *(pulSrcMipmap + offset + 1);
|
||||||
|
pulSrcMipmap += 2;
|
||||||
|
pulDstMipmap++;
|
||||||
|
}
|
||||||
|
|
||||||
|
pulSrcMipmap += ulRowModulo;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = pixWidth * 2;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -649,7 +644,7 @@ __int64 mmShifter = 0;
|
||||||
__int64 mmMask = 0;
|
__int64 mmMask = 0;
|
||||||
ULONG *pulDitherTable;
|
ULONG *pulDitherTable;
|
||||||
|
|
||||||
#ifdef USE_PORTABLE_C
|
#if !(defined __MSVC_INLINE__) && !(defined __GNU_INLINE_X86_32__)
|
||||||
extern const UBYTE *pubClipByte;
|
extern const UBYTE *pubClipByte;
|
||||||
// increment a byte without overflowing it
|
// increment a byte without overflowing it
|
||||||
static inline void IncrementByteWithClip( UBYTE &ub, SLONG slAdd)
|
static inline void IncrementByteWithClip( UBYTE &ub, SLONG slAdd)
|
||||||
|
@ -778,35 +773,7 @@ void DitherBitmap( INDEX iDitherType, ULONG *pulSrc, ULONG *pulDst, PIX pixWidth
|
||||||
// ------------------------------- ordered matrix dithering routine
|
// ------------------------------- ordered matrix dithering routine
|
||||||
|
|
||||||
ditherOrder:
|
ditherOrder:
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
union uConv
|
|
||||||
{
|
|
||||||
ULONG val;
|
|
||||||
DWORD dwords[2];
|
|
||||||
UWORD words[4];
|
|
||||||
WORD iwords[4];
|
|
||||||
UBYTE bytes[8];
|
|
||||||
};
|
|
||||||
for (int i=0; i<pixHeight; i++) {
|
|
||||||
int idx = i&3;
|
|
||||||
uConv dith;
|
|
||||||
dith.val = pulDitherTable[idx];
|
|
||||||
for (int j=0; j<4; j++) { dith.words[j] >>= mmShifter; }
|
|
||||||
dith.val &= mmMask;
|
|
||||||
uConv* src = (uConv*)(pulSrc+i*pixWidth);
|
|
||||||
uConv* dst = (uConv*)(pulDst+i*pixWidth);
|
|
||||||
for (int j=0; j<pixWidth; j+=2) {
|
|
||||||
uConv p=src[0];
|
|
||||||
for (int k=0; k<8; k++) {
|
|
||||||
IncrementByteWithClip(p.bytes[k], dith.bytes[k]);
|
|
||||||
}
|
|
||||||
dst[0] = p;
|
|
||||||
src++;
|
|
||||||
dst++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
mov esi,D [pulSrc]
|
mov esi,D [pulSrc]
|
||||||
mov edi,D [pulDst]
|
mov edi,D [pulDst]
|
||||||
|
@ -852,7 +819,7 @@ nextRowO:
|
||||||
emms;
|
emms;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG tmp;
|
ULONG tmp;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"movl %[pulSrc], %%esi \n\t"
|
"movl %[pulSrc], %%esi \n\t"
|
||||||
|
@ -912,7 +879,33 @@ nextRowO:
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Write inline asm for your platform.
|
union uConv
|
||||||
|
{
|
||||||
|
ULONG val;
|
||||||
|
DWORD dwords[2];
|
||||||
|
UWORD words[4];
|
||||||
|
WORD iwords[4];
|
||||||
|
UBYTE bytes[8];
|
||||||
|
};
|
||||||
|
for (int i=0; i<pixHeight; i++) {
|
||||||
|
int idx = i&3;
|
||||||
|
uConv dith;
|
||||||
|
dith.val = pulDitherTable[idx];
|
||||||
|
for (int j=0; j<4; j++) { dith.words[j] >>= mmShifter; }
|
||||||
|
dith.val &= mmMask;
|
||||||
|
uConv* src = (uConv*)(pulSrc+i*pixWidth);
|
||||||
|
uConv* dst = (uConv*)(pulDst+i*pixWidth);
|
||||||
|
for (int j=0; j<pixWidth; j+=2) {
|
||||||
|
uConv p=src[0];
|
||||||
|
for (int k=0; k<8; k++) {
|
||||||
|
IncrementByteWithClip(p.bytes[k], dith.bytes[k]);
|
||||||
|
}
|
||||||
|
dst[0] = p;
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
goto theEnd;
|
goto theEnd;
|
||||||
|
@ -924,34 +917,7 @@ ditherError:
|
||||||
if( pulDst!=pulSrc) memcpy( pulDst, pulSrc, pixCanvasWidth*pixCanvasHeight *BYTES_PER_TEXEL);
|
if( pulDst!=pulSrc) memcpy( pulDst, pulSrc, pixCanvasWidth*pixCanvasHeight *BYTES_PER_TEXEL);
|
||||||
// slModulo+=4;
|
// slModulo+=4;
|
||||||
// now, dither destination
|
// now, dither destination
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
#if 1 //SEB doesn't works....
|
|
||||||
for (int i=0; i<pixHeight-1; i++) {
|
|
||||||
int step = (i&1)?-4:+4;
|
|
||||||
const UBYTE ubMask = (mmErrDiffMask&0xff);
|
|
||||||
UBYTE *src = ((UBYTE*)pulDst)+i*pixCanvasWidth*4;
|
|
||||||
if(i&1) src+=pixWidth*4;
|
|
||||||
// left to right or right to left
|
|
||||||
for (int j=0; j<pixWidth-1; j++) {
|
|
||||||
uConv p1, p3, p5, p7;
|
|
||||||
src+=step;
|
|
||||||
for (int k=0; k<4; k++) { p1.words[k] = src[k]&ubMask; }
|
|
||||||
//p1.val &= mmErrDiffMask;
|
|
||||||
for (int k=0; k<4; k++) { p3.words[k] = (p1.words[k]*3)>>4;
|
|
||||||
p5.words[k] = (p1.words[k]*5)>>4;
|
|
||||||
p7.words[k] = (p1.words[k]*7)>>4; }
|
|
||||||
for (int k=0; k<4; k++) { p1.words[k] -= (p3.words[k] + p5.words[k] + p7.words[k]);}
|
|
||||||
for (int k=0; k<4; k++) {
|
|
||||||
IncrementByteWithClip( src[k + step] , p7.words[k]);
|
|
||||||
IncrementByteWithClip( src[pixCanvasWidth*4 -step +k], p5.words[k]);
|
|
||||||
IncrementByteWithClip( src[pixCanvasWidth*4 +0 +k], p3.words[k]);
|
|
||||||
IncrementByteWithClip( src[pixCanvasWidth*4 +step +k], p1.words[k]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
pxor mm0,mm0
|
pxor mm0,mm0
|
||||||
mov esi,D [pulDst]
|
mov esi,D [pulDst]
|
||||||
|
@ -1046,7 +1012,7 @@ allDoneE:
|
||||||
emms;
|
emms;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"pxor %%mm0, %%mm0 \n\t"
|
"pxor %%mm0, %%mm0 \n\t"
|
||||||
"movl %[pulDst], %%esi \n\t"
|
"movl %[pulDst], %%esi \n\t"
|
||||||
|
@ -1157,7 +1123,32 @@ allDoneE:
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Write inline asm for your platform.
|
#if 1 //SEB doesn't works....
|
||||||
|
for (int i=0; i<pixHeight-1; i++) {
|
||||||
|
int step = (i&1)?-4:+4;
|
||||||
|
const UBYTE ubMask = (mmErrDiffMask&0xff);
|
||||||
|
UBYTE *src = ((UBYTE*)pulDst)+i*pixCanvasWidth*4;
|
||||||
|
if(i&1) src+=pixWidth*4;
|
||||||
|
// left to right or right to left
|
||||||
|
for (int j=0; j<pixWidth-1; j++) {
|
||||||
|
uConv p1, p3, p5, p7;
|
||||||
|
src+=step;
|
||||||
|
for (int k=0; k<4; k++) { p1.words[k] = src[k]&ubMask; }
|
||||||
|
//p1.val &= mmErrDiffMask;
|
||||||
|
for (int k=0; k<4; k++) { p3.words[k] = (p1.words[k]*3)>>4;
|
||||||
|
p5.words[k] = (p1.words[k]*5)>>4;
|
||||||
|
p7.words[k] = (p1.words[k]*7)>>4; }
|
||||||
|
for (int k=0; k<4; k++) { p1.words[k] -= (p3.words[k] + p5.words[k] + p7.words[k]);}
|
||||||
|
for (int k=0; k<4; k++) {
|
||||||
|
IncrementByteWithClip( src[k + step] , p7.words[k]);
|
||||||
|
IncrementByteWithClip( src[pixCanvasWidth*4 -step +k], p5.words[k]);
|
||||||
|
IncrementByteWithClip( src[pixCanvasWidth*4 +0 +k], p3.words[k]);
|
||||||
|
IncrementByteWithClip( src[pixCanvasWidth*4 +step +k], p1.words[k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
goto theEnd;
|
goto theEnd;
|
||||||
|
@ -1265,7 +1256,7 @@ extern "C" {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef USE_PORTABLE_C
|
#if !(defined USE_MMX_INTRINSICS) && !(defined __MSVC_INLINE__) && !(defined __GNU_INLINE_X86_32__)
|
||||||
typedef SWORD ExtPix[4];
|
typedef SWORD ExtPix[4];
|
||||||
|
|
||||||
static inline void extpix_fromi64(ExtPix &pix, const __int64 i64)
|
static inline void extpix_fromi64(ExtPix &pix, const __int64 i64)
|
||||||
|
@ -1632,265 +1623,6 @@ void FilterBitmap( INDEX iFilter, ULONG *pulSrc, ULONG *pulDst, PIX pixWidth, PI
|
||||||
_mm_empty(); // we're done, clear out the MMX registers!
|
_mm_empty(); // we're done, clear out the MMX registers!
|
||||||
|
|
||||||
|
|
||||||
#elif (defined USE_PORTABLE_C)
|
|
||||||
slModulo1 /= BYTES_PER_TEXEL; // C++ handles incrementing by sizeof type
|
|
||||||
slCanvasWidth /= BYTES_PER_TEXEL; // C++ handles incrementing by sizeof type
|
|
||||||
|
|
||||||
ULONG *src = pulSrc;
|
|
||||||
ULONG *dst = pulDst;
|
|
||||||
ULONG *rowptr = aulRows;
|
|
||||||
|
|
||||||
ExtPix rmm1={0}, rmm2={0}, rmm3={0}, rmm4={0}, rmm5={0}, rmm6={0}, rmm7={0};
|
|
||||||
#define EXTPIXFROMINT64(x) ExtPix r##x; extpix_fromi64(r##x, x);
|
|
||||||
EXTPIXFROMINT64(mmCm);
|
|
||||||
EXTPIXFROMINT64(mmCe);
|
|
||||||
EXTPIXFROMINT64(mmCc);
|
|
||||||
EXTPIXFROMINT64(mmEch);
|
|
||||||
EXTPIXFROMINT64(mmEcl);
|
|
||||||
EXTPIXFROMINT64(mmEe);
|
|
||||||
EXTPIXFROMINT64(mmEm);
|
|
||||||
EXTPIXFROMINT64(mmMm);
|
|
||||||
EXTPIXFROMINT64(mmMe);
|
|
||||||
EXTPIXFROMINT64(mmMc);
|
|
||||||
EXTPIXFROMINT64(mmAdd);
|
|
||||||
EXTPIXFROMINT64(mmInvDiv);
|
|
||||||
#undef EXTPIXFROMINT64
|
|
||||||
|
|
||||||
// ----------------------- process upper left corner
|
|
||||||
extend_pixel(src[0], rmm1);
|
|
||||||
extend_pixel(src[1], rmm2);
|
|
||||||
extend_pixel(src[pixCanvasWidth], rmm3);
|
|
||||||
extend_pixel(src[pixCanvasWidth+1], rmm4);
|
|
||||||
|
|
||||||
extpix_add(rmm2, rmm3);
|
|
||||||
extpix_mul(rmm1, rmmCm);
|
|
||||||
extpix_mul(rmm2, rmmCe);
|
|
||||||
extpix_mul(rmm4, rmmCc);
|
|
||||||
extpix_add(rmm1, rmm2);
|
|
||||||
extpix_add(rmm1, rmm4);
|
|
||||||
extpix_adds(rmm1, rmmAdd);
|
|
||||||
extpix_mulhi(rmm1, rmmInvDiv);
|
|
||||||
*(rowptr++) = unextend_pixel(rmm1);
|
|
||||||
|
|
||||||
src++;
|
|
||||||
|
|
||||||
// ----------------------- process upper edge pixels
|
|
||||||
for (PIX i = pixWidth - 2; i != 0; i--)
|
|
||||||
{
|
|
||||||
extend_pixel(src[-1], rmm1);
|
|
||||||
extend_pixel(src[0], rmm2);
|
|
||||||
extend_pixel(src[1], rmm3);
|
|
||||||
extend_pixel(src[pixCanvasWidth-1], rmm4);
|
|
||||||
extend_pixel(src[pixCanvasWidth], rmm5);
|
|
||||||
extend_pixel(src[pixCanvasWidth+1], rmm6);
|
|
||||||
|
|
||||||
extpix_add(rmm1, rmm3);
|
|
||||||
extpix_add(rmm4, rmm6);
|
|
||||||
extpix_mul(rmm1, rmmEch);
|
|
||||||
extpix_mul(rmm2, rmmEm);
|
|
||||||
extpix_mul(rmm4, rmmEcl);
|
|
||||||
extpix_mul(rmm5, rmmEe);
|
|
||||||
extpix_add(rmm1, rmm2);
|
|
||||||
extpix_add(rmm1, rmm4);
|
|
||||||
extpix_add(rmm1, rmm5);
|
|
||||||
extpix_adds(rmm1, rmmAdd);
|
|
||||||
extpix_mulhi(rmm1, rmmInvDiv);
|
|
||||||
*(rowptr++) = unextend_pixel(rmm1);
|
|
||||||
src++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------- process upper right corner
|
|
||||||
|
|
||||||
extend_pixel(src[-1], rmm1);
|
|
||||||
extend_pixel(src[0], rmm2);
|
|
||||||
extend_pixel(src[pixCanvasWidth-1], rmm3);
|
|
||||||
extend_pixel(src[pixCanvasWidth], rmm4);
|
|
||||||
|
|
||||||
extpix_add(rmm1, rmm4);
|
|
||||||
extpix_mul(rmm1, rmmCe);
|
|
||||||
extpix_mul(rmm2, rmmCm);
|
|
||||||
extpix_mul(rmm3, rmmCc);
|
|
||||||
extpix_add(rmm1, rmm2);
|
|
||||||
extpix_add(rmm1, rmm3);
|
|
||||||
extpix_adds(rmm1, rmmAdd);
|
|
||||||
extpix_mulhi(rmm1, rmmInvDiv);
|
|
||||||
*rowptr = unextend_pixel(rmm1);
|
|
||||||
|
|
||||||
// ----------------------- process bitmap middle pixels
|
|
||||||
|
|
||||||
dst += slCanvasWidth;
|
|
||||||
src += slModulo1;
|
|
||||||
|
|
||||||
// for each row
|
|
||||||
for (size_t i = pixHeight-2; i != 0; i--) // rowLoop
|
|
||||||
{
|
|
||||||
rowptr = aulRows;
|
|
||||||
|
|
||||||
// process left edge pixel
|
|
||||||
extend_pixel(src[-pixCanvasWidth], rmm1);
|
|
||||||
extend_pixel(src[(-pixCanvasWidth)+1], rmm2);
|
|
||||||
extend_pixel(src[0], rmm3);
|
|
||||||
extend_pixel(src[1], rmm4);
|
|
||||||
extend_pixel(src[pixCanvasWidth], rmm5);
|
|
||||||
extend_pixel(src[pixCanvasWidth+1], rmm6);
|
|
||||||
|
|
||||||
extpix_add(rmm1, rmm5);
|
|
||||||
extpix_add(rmm2, rmm6);
|
|
||||||
extpix_mul(rmm1, rmmEch);
|
|
||||||
extpix_mul(rmm2, rmmEcl);
|
|
||||||
extpix_mul(rmm3, rmmEm);
|
|
||||||
extpix_mul(rmm4, rmmEe);
|
|
||||||
extpix_add(rmm1, rmm2);
|
|
||||||
extpix_add(rmm1, rmm3);
|
|
||||||
extpix_add(rmm1, rmm4);
|
|
||||||
extpix_adds(rmm1, rmmAdd);
|
|
||||||
extpix_mulhi(rmm1, rmmInvDiv);
|
|
||||||
dst[-pixCanvasWidth] = *rowptr;
|
|
||||||
*(rowptr++) = unextend_pixel(rmm1);
|
|
||||||
src++;
|
|
||||||
dst++;
|
|
||||||
|
|
||||||
// for each pixel in current row
|
|
||||||
for (size_t j = pixWidth-2; j != 0; j--) // pixLoop
|
|
||||||
{
|
|
||||||
// prepare upper convolution row
|
|
||||||
extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
|
|
||||||
extend_pixel(src[-pixCanvasWidth], rmm2);
|
|
||||||
extend_pixel(src[(-pixCanvasWidth)+1], rmm3);
|
|
||||||
|
|
||||||
// prepare middle convolution row
|
|
||||||
extend_pixel(src[-1], rmm4);
|
|
||||||
extend_pixel(src[0], rmm5);
|
|
||||||
extend_pixel(src[1], rmm6);
|
|
||||||
|
|
||||||
// free some registers
|
|
||||||
extpix_add(rmm1, rmm3);
|
|
||||||
extpix_add(rmm2, rmm4);
|
|
||||||
extpix_mul(rmm5, rmmMm);
|
|
||||||
|
|
||||||
// prepare lower convolution row
|
|
||||||
extend_pixel(src[pixCanvasWidth-1], rmm3);
|
|
||||||
extend_pixel(src[pixCanvasWidth], rmm4);
|
|
||||||
extend_pixel(src[pixCanvasWidth+1], rmm7);
|
|
||||||
|
|
||||||
// calc weightened value
|
|
||||||
extpix_add(rmm2, rmm6);
|
|
||||||
extpix_add(rmm1, rmm3);
|
|
||||||
extpix_add(rmm2, rmm4);
|
|
||||||
extpix_add(rmm1, rmm7);
|
|
||||||
extpix_mul(rmm2, rmmMe);
|
|
||||||
extpix_mul(rmm1, rmmMc);
|
|
||||||
extpix_add(rmm2, rmm5);
|
|
||||||
extpix_add(rmm1, rmm2);
|
|
||||||
|
|
||||||
// calc and store wightened value
|
|
||||||
extpix_adds(rmm1, rmmAdd);
|
|
||||||
extpix_mulhi(rmm1, rmmInvDiv);
|
|
||||||
dst[-pixCanvasWidth] = *rowptr;
|
|
||||||
*(rowptr++) = unextend_pixel(rmm1);
|
|
||||||
|
|
||||||
// advance to next pixel
|
|
||||||
src++;
|
|
||||||
dst++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// process right edge pixel
|
|
||||||
extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
|
|
||||||
extend_pixel(src[-pixCanvasWidth], rmm2);
|
|
||||||
extend_pixel(src[-1], rmm3);
|
|
||||||
extend_pixel(src[0], rmm4);
|
|
||||||
extend_pixel(src[pixCanvasWidth-1], rmm5);
|
|
||||||
extend_pixel(src[pixCanvasWidth], rmm6);
|
|
||||||
|
|
||||||
extpix_add(rmm1, rmm5);
|
|
||||||
extpix_add(rmm2, rmm6);
|
|
||||||
extpix_mul(rmm1, rmmEcl);
|
|
||||||
extpix_mul(rmm2, rmmEch);
|
|
||||||
extpix_mul(rmm3, rmmEe);
|
|
||||||
extpix_mul(rmm4, rmmEm);
|
|
||||||
extpix_add(rmm1, rmm2);
|
|
||||||
extpix_add(rmm1, rmm3);
|
|
||||||
extpix_add(rmm1, rmm4);
|
|
||||||
extpix_adds(rmm1, rmmAdd);
|
|
||||||
extpix_mulhi(rmm1, rmmInvDiv);
|
|
||||||
dst[-pixCanvasWidth] = *rowptr;
|
|
||||||
*rowptr = unextend_pixel(rmm1);
|
|
||||||
|
|
||||||
// advance to next row
|
|
||||||
src += slModulo1;
|
|
||||||
dst += slModulo1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------- process lower left corner
|
|
||||||
rowptr = aulRows;
|
|
||||||
extend_pixel(src[-pixCanvasWidth], rmm1);
|
|
||||||
extend_pixel(src[(-pixCanvasWidth)+1], rmm2);
|
|
||||||
extend_pixel(src[0], rmm3);
|
|
||||||
extend_pixel(src[1], rmm4);
|
|
||||||
|
|
||||||
extpix_add(rmm1, rmm4);
|
|
||||||
extpix_mul(rmm1, rmmCe);
|
|
||||||
extpix_mul(rmm2, rmmCc);
|
|
||||||
extpix_mul(rmm3, rmmCm);
|
|
||||||
extpix_add(rmm1, rmm2);
|
|
||||||
extpix_add(rmm1, rmm3);
|
|
||||||
extpix_adds(rmm1, rmmAdd);
|
|
||||||
extpix_mulhi(rmm1, rmmInvDiv);
|
|
||||||
dst[-pixCanvasWidth] = *rowptr;
|
|
||||||
dst[0] = unextend_pixel(rmm1);
|
|
||||||
|
|
||||||
src++;
|
|
||||||
dst++;
|
|
||||||
rowptr++;
|
|
||||||
|
|
||||||
// ----------------------- process lower edge pixels
|
|
||||||
for (size_t i = pixWidth-2; i != 0; i--) // lowerLoop
|
|
||||||
{
|
|
||||||
// for each pixel
|
|
||||||
extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
|
|
||||||
extend_pixel(src[-pixCanvasWidth], rmm2);
|
|
||||||
extend_pixel(src[(-pixCanvasWidth)+1], rmm3);
|
|
||||||
extend_pixel(src[-1], rmm4);
|
|
||||||
extend_pixel(src[0], rmm5);
|
|
||||||
extend_pixel(src[1], rmm6);
|
|
||||||
|
|
||||||
extpix_add(rmm1, rmm3);
|
|
||||||
extpix_add(rmm4, rmm6);
|
|
||||||
extpix_mul(rmm1, rmmEcl);
|
|
||||||
extpix_mul(rmm2, rmmEe);
|
|
||||||
extpix_mul(rmm4, rmmEch);
|
|
||||||
extpix_mul(rmm5, rmmEm);
|
|
||||||
extpix_add(rmm1, rmm2);
|
|
||||||
extpix_add(rmm1, rmm4);
|
|
||||||
extpix_add(rmm1, rmm5);
|
|
||||||
extpix_adds(rmm1, rmmAdd);
|
|
||||||
extpix_mulhi(rmm1, rmmInvDiv);
|
|
||||||
dst[-pixCanvasWidth] = *rowptr;
|
|
||||||
dst[0] = unextend_pixel(rmm1);
|
|
||||||
|
|
||||||
// advance to next pixel
|
|
||||||
src++;
|
|
||||||
dst++;
|
|
||||||
rowptr++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------- lower right corners
|
|
||||||
extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
|
|
||||||
extend_pixel(src[-pixCanvasWidth], rmm2);
|
|
||||||
extend_pixel(src[-1], rmm3);
|
|
||||||
extend_pixel(src[0], rmm4);
|
|
||||||
|
|
||||||
extpix_add(rmm2, rmm3);
|
|
||||||
extpix_mul(rmm1, rmmCc);
|
|
||||||
extpix_mul(rmm2, rmmCe);
|
|
||||||
extpix_mul(rmm4, rmmCm);
|
|
||||||
extpix_add(rmm1, rmm2);
|
|
||||||
extpix_add(rmm1, rmm4);
|
|
||||||
extpix_adds(rmm1, rmmAdd);
|
|
||||||
extpix_mulhi(rmm1, rmmInvDiv);
|
|
||||||
dst[-pixCanvasWidth] = *rowptr;
|
|
||||||
dst[0] = unextend_pixel(rmm1);
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
#elif (defined __MSVC_INLINE__)
|
||||||
__asm {
|
__asm {
|
||||||
cld
|
cld
|
||||||
|
@ -2204,7 +1936,7 @@ lowerLoop:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
|
|
||||||
FB_pulSrc = pulSrc;
|
FB_pulSrc = pulSrc;
|
||||||
FB_pulDst = pulDst;
|
FB_pulDst = pulDst;
|
||||||
|
@ -2537,7 +2269,264 @@ lowerLoop:
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Write inline asm for your platform.
|
slModulo1 /= BYTES_PER_TEXEL; // C++ handles incrementing by sizeof type
|
||||||
|
slCanvasWidth /= BYTES_PER_TEXEL; // C++ handles incrementing by sizeof type
|
||||||
|
|
||||||
|
ULONG *src = pulSrc;
|
||||||
|
ULONG *dst = pulDst;
|
||||||
|
ULONG *rowptr = aulRows;
|
||||||
|
|
||||||
|
ExtPix rmm1={0}, rmm2={0}, rmm3={0}, rmm4={0}, rmm5={0}, rmm6={0}, rmm7={0};
|
||||||
|
#define EXTPIXFROMINT64(x) ExtPix r##x; extpix_fromi64(r##x, x);
|
||||||
|
EXTPIXFROMINT64(mmCm);
|
||||||
|
EXTPIXFROMINT64(mmCe);
|
||||||
|
EXTPIXFROMINT64(mmCc);
|
||||||
|
EXTPIXFROMINT64(mmEch);
|
||||||
|
EXTPIXFROMINT64(mmEcl);
|
||||||
|
EXTPIXFROMINT64(mmEe);
|
||||||
|
EXTPIXFROMINT64(mmEm);
|
||||||
|
EXTPIXFROMINT64(mmMm);
|
||||||
|
EXTPIXFROMINT64(mmMe);
|
||||||
|
EXTPIXFROMINT64(mmMc);
|
||||||
|
EXTPIXFROMINT64(mmAdd);
|
||||||
|
EXTPIXFROMINT64(mmInvDiv);
|
||||||
|
#undef EXTPIXFROMINT64
|
||||||
|
|
||||||
|
// ----------------------- process upper left corner
|
||||||
|
extend_pixel(src[0], rmm1);
|
||||||
|
extend_pixel(src[1], rmm2);
|
||||||
|
extend_pixel(src[pixCanvasWidth], rmm3);
|
||||||
|
extend_pixel(src[pixCanvasWidth+1], rmm4);
|
||||||
|
|
||||||
|
extpix_add(rmm2, rmm3);
|
||||||
|
extpix_mul(rmm1, rmmCm);
|
||||||
|
extpix_mul(rmm2, rmmCe);
|
||||||
|
extpix_mul(rmm4, rmmCc);
|
||||||
|
extpix_add(rmm1, rmm2);
|
||||||
|
extpix_add(rmm1, rmm4);
|
||||||
|
extpix_adds(rmm1, rmmAdd);
|
||||||
|
extpix_mulhi(rmm1, rmmInvDiv);
|
||||||
|
*(rowptr++) = unextend_pixel(rmm1);
|
||||||
|
|
||||||
|
src++;
|
||||||
|
|
||||||
|
// ----------------------- process upper edge pixels
|
||||||
|
for (PIX i = pixWidth - 2; i != 0; i--)
|
||||||
|
{
|
||||||
|
extend_pixel(src[-1], rmm1);
|
||||||
|
extend_pixel(src[0], rmm2);
|
||||||
|
extend_pixel(src[1], rmm3);
|
||||||
|
extend_pixel(src[pixCanvasWidth-1], rmm4);
|
||||||
|
extend_pixel(src[pixCanvasWidth], rmm5);
|
||||||
|
extend_pixel(src[pixCanvasWidth+1], rmm6);
|
||||||
|
|
||||||
|
extpix_add(rmm1, rmm3);
|
||||||
|
extpix_add(rmm4, rmm6);
|
||||||
|
extpix_mul(rmm1, rmmEch);
|
||||||
|
extpix_mul(rmm2, rmmEm);
|
||||||
|
extpix_mul(rmm4, rmmEcl);
|
||||||
|
extpix_mul(rmm5, rmmEe);
|
||||||
|
extpix_add(rmm1, rmm2);
|
||||||
|
extpix_add(rmm1, rmm4);
|
||||||
|
extpix_add(rmm1, rmm5);
|
||||||
|
extpix_adds(rmm1, rmmAdd);
|
||||||
|
extpix_mulhi(rmm1, rmmInvDiv);
|
||||||
|
*(rowptr++) = unextend_pixel(rmm1);
|
||||||
|
src++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------- process upper right corner
|
||||||
|
|
||||||
|
extend_pixel(src[-1], rmm1);
|
||||||
|
extend_pixel(src[0], rmm2);
|
||||||
|
extend_pixel(src[pixCanvasWidth-1], rmm3);
|
||||||
|
extend_pixel(src[pixCanvasWidth], rmm4);
|
||||||
|
|
||||||
|
extpix_add(rmm1, rmm4);
|
||||||
|
extpix_mul(rmm1, rmmCe);
|
||||||
|
extpix_mul(rmm2, rmmCm);
|
||||||
|
extpix_mul(rmm3, rmmCc);
|
||||||
|
extpix_add(rmm1, rmm2);
|
||||||
|
extpix_add(rmm1, rmm3);
|
||||||
|
extpix_adds(rmm1, rmmAdd);
|
||||||
|
extpix_mulhi(rmm1, rmmInvDiv);
|
||||||
|
*rowptr = unextend_pixel(rmm1);
|
||||||
|
|
||||||
|
// ----------------------- process bitmap middle pixels
|
||||||
|
|
||||||
|
dst += slCanvasWidth;
|
||||||
|
src += slModulo1;
|
||||||
|
|
||||||
|
// for each row
|
||||||
|
for (size_t i = pixHeight-2; i != 0; i--) // rowLoop
|
||||||
|
{
|
||||||
|
rowptr = aulRows;
|
||||||
|
|
||||||
|
// process left edge pixel
|
||||||
|
extend_pixel(src[-pixCanvasWidth], rmm1);
|
||||||
|
extend_pixel(src[(-pixCanvasWidth)+1], rmm2);
|
||||||
|
extend_pixel(src[0], rmm3);
|
||||||
|
extend_pixel(src[1], rmm4);
|
||||||
|
extend_pixel(src[pixCanvasWidth], rmm5);
|
||||||
|
extend_pixel(src[pixCanvasWidth+1], rmm6);
|
||||||
|
|
||||||
|
extpix_add(rmm1, rmm5);
|
||||||
|
extpix_add(rmm2, rmm6);
|
||||||
|
extpix_mul(rmm1, rmmEch);
|
||||||
|
extpix_mul(rmm2, rmmEcl);
|
||||||
|
extpix_mul(rmm3, rmmEm);
|
||||||
|
extpix_mul(rmm4, rmmEe);
|
||||||
|
extpix_add(rmm1, rmm2);
|
||||||
|
extpix_add(rmm1, rmm3);
|
||||||
|
extpix_add(rmm1, rmm4);
|
||||||
|
extpix_adds(rmm1, rmmAdd);
|
||||||
|
extpix_mulhi(rmm1, rmmInvDiv);
|
||||||
|
dst[-pixCanvasWidth] = *rowptr;
|
||||||
|
*(rowptr++) = unextend_pixel(rmm1);
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
|
||||||
|
// for each pixel in current row
|
||||||
|
for (size_t j = pixWidth-2; j != 0; j--) // pixLoop
|
||||||
|
{
|
||||||
|
// prepare upper convolution row
|
||||||
|
extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
|
||||||
|
extend_pixel(src[-pixCanvasWidth], rmm2);
|
||||||
|
extend_pixel(src[(-pixCanvasWidth)+1], rmm3);
|
||||||
|
|
||||||
|
// prepare middle convolution row
|
||||||
|
extend_pixel(src[-1], rmm4);
|
||||||
|
extend_pixel(src[0], rmm5);
|
||||||
|
extend_pixel(src[1], rmm6);
|
||||||
|
|
||||||
|
// free some registers
|
||||||
|
extpix_add(rmm1, rmm3);
|
||||||
|
extpix_add(rmm2, rmm4);
|
||||||
|
extpix_mul(rmm5, rmmMm);
|
||||||
|
|
||||||
|
// prepare lower convolution row
|
||||||
|
extend_pixel(src[pixCanvasWidth-1], rmm3);
|
||||||
|
extend_pixel(src[pixCanvasWidth], rmm4);
|
||||||
|
extend_pixel(src[pixCanvasWidth+1], rmm7);
|
||||||
|
|
||||||
|
// calc weightened value
|
||||||
|
extpix_add(rmm2, rmm6);
|
||||||
|
extpix_add(rmm1, rmm3);
|
||||||
|
extpix_add(rmm2, rmm4);
|
||||||
|
extpix_add(rmm1, rmm7);
|
||||||
|
extpix_mul(rmm2, rmmMe);
|
||||||
|
extpix_mul(rmm1, rmmMc);
|
||||||
|
extpix_add(rmm2, rmm5);
|
||||||
|
extpix_add(rmm1, rmm2);
|
||||||
|
|
||||||
|
// calc and store wightened value
|
||||||
|
extpix_adds(rmm1, rmmAdd);
|
||||||
|
extpix_mulhi(rmm1, rmmInvDiv);
|
||||||
|
dst[-pixCanvasWidth] = *rowptr;
|
||||||
|
*(rowptr++) = unextend_pixel(rmm1);
|
||||||
|
|
||||||
|
// advance to next pixel
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// process right edge pixel
|
||||||
|
extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
|
||||||
|
extend_pixel(src[-pixCanvasWidth], rmm2);
|
||||||
|
extend_pixel(src[-1], rmm3);
|
||||||
|
extend_pixel(src[0], rmm4);
|
||||||
|
extend_pixel(src[pixCanvasWidth-1], rmm5);
|
||||||
|
extend_pixel(src[pixCanvasWidth], rmm6);
|
||||||
|
|
||||||
|
extpix_add(rmm1, rmm5);
|
||||||
|
extpix_add(rmm2, rmm6);
|
||||||
|
extpix_mul(rmm1, rmmEcl);
|
||||||
|
extpix_mul(rmm2, rmmEch);
|
||||||
|
extpix_mul(rmm3, rmmEe);
|
||||||
|
extpix_mul(rmm4, rmmEm);
|
||||||
|
extpix_add(rmm1, rmm2);
|
||||||
|
extpix_add(rmm1, rmm3);
|
||||||
|
extpix_add(rmm1, rmm4);
|
||||||
|
extpix_adds(rmm1, rmmAdd);
|
||||||
|
extpix_mulhi(rmm1, rmmInvDiv);
|
||||||
|
dst[-pixCanvasWidth] = *rowptr;
|
||||||
|
*rowptr = unextend_pixel(rmm1);
|
||||||
|
|
||||||
|
// advance to next row
|
||||||
|
src += slModulo1;
|
||||||
|
dst += slModulo1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------- process lower left corner
|
||||||
|
rowptr = aulRows;
|
||||||
|
extend_pixel(src[-pixCanvasWidth], rmm1);
|
||||||
|
extend_pixel(src[(-pixCanvasWidth)+1], rmm2);
|
||||||
|
extend_pixel(src[0], rmm3);
|
||||||
|
extend_pixel(src[1], rmm4);
|
||||||
|
|
||||||
|
extpix_add(rmm1, rmm4);
|
||||||
|
extpix_mul(rmm1, rmmCe);
|
||||||
|
extpix_mul(rmm2, rmmCc);
|
||||||
|
extpix_mul(rmm3, rmmCm);
|
||||||
|
extpix_add(rmm1, rmm2);
|
||||||
|
extpix_add(rmm1, rmm3);
|
||||||
|
extpix_adds(rmm1, rmmAdd);
|
||||||
|
extpix_mulhi(rmm1, rmmInvDiv);
|
||||||
|
dst[-pixCanvasWidth] = *rowptr;
|
||||||
|
dst[0] = unextend_pixel(rmm1);
|
||||||
|
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
rowptr++;
|
||||||
|
|
||||||
|
// ----------------------- process lower edge pixels
|
||||||
|
for (size_t i = pixWidth-2; i != 0; i--) // lowerLoop
|
||||||
|
{
|
||||||
|
// for each pixel
|
||||||
|
extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
|
||||||
|
extend_pixel(src[-pixCanvasWidth], rmm2);
|
||||||
|
extend_pixel(src[(-pixCanvasWidth)+1], rmm3);
|
||||||
|
extend_pixel(src[-1], rmm4);
|
||||||
|
extend_pixel(src[0], rmm5);
|
||||||
|
extend_pixel(src[1], rmm6);
|
||||||
|
|
||||||
|
extpix_add(rmm1, rmm3);
|
||||||
|
extpix_add(rmm4, rmm6);
|
||||||
|
extpix_mul(rmm1, rmmEcl);
|
||||||
|
extpix_mul(rmm2, rmmEe);
|
||||||
|
extpix_mul(rmm4, rmmEch);
|
||||||
|
extpix_mul(rmm5, rmmEm);
|
||||||
|
extpix_add(rmm1, rmm2);
|
||||||
|
extpix_add(rmm1, rmm4);
|
||||||
|
extpix_add(rmm1, rmm5);
|
||||||
|
extpix_adds(rmm1, rmmAdd);
|
||||||
|
extpix_mulhi(rmm1, rmmInvDiv);
|
||||||
|
dst[-pixCanvasWidth] = *rowptr;
|
||||||
|
dst[0] = unextend_pixel(rmm1);
|
||||||
|
|
||||||
|
// advance to next pixel
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
rowptr++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------- lower right corners
|
||||||
|
extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
|
||||||
|
extend_pixel(src[-pixCanvasWidth], rmm2);
|
||||||
|
extend_pixel(src[-1], rmm3);
|
||||||
|
extend_pixel(src[0], rmm4);
|
||||||
|
|
||||||
|
extpix_add(rmm2, rmm3);
|
||||||
|
extpix_mul(rmm1, rmmCc);
|
||||||
|
extpix_mul(rmm2, rmmCe);
|
||||||
|
extpix_mul(rmm4, rmmCm);
|
||||||
|
extpix_add(rmm1, rmm2);
|
||||||
|
extpix_add(rmm1, rmm4);
|
||||||
|
extpix_adds(rmm1, rmmAdd);
|
||||||
|
extpix_mulhi(rmm1, rmmInvDiv);
|
||||||
|
dst[-pixCanvasWidth] = *rowptr;
|
||||||
|
dst[0] = unextend_pixel(rmm1);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// all done (finally)
|
// all done (finally)
|
||||||
|
|
|
@ -89,20 +89,14 @@ extern void (__stdcall *pglPNTrianglesfATI)( GLenum pname, GLfloat param);
|
||||||
inline void glCOLOR( COLOR col)
|
inline void glCOLOR( COLOR col)
|
||||||
{
|
{
|
||||||
/* rcg10052001 Platform-wrappers. */
|
/* rcg10052001 Platform-wrappers. */
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
col = ( ((col << 24) ) |
|
|
||||||
((col << 8) & 0x00FF0000) |
|
|
||||||
((col >> 8) & 0x0000FF00) |
|
|
||||||
((col >> 24) ) );
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
mov eax,dword ptr [col]
|
mov eax,dword ptr [col]
|
||||||
bswap eax
|
bswap eax
|
||||||
mov dword ptr [col],eax
|
mov dword ptr [col],eax
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"bswapl %%eax \n\t"
|
"bswapl %%eax \n\t"
|
||||||
: "=a" (col)
|
: "=a" (col)
|
||||||
|
@ -110,7 +104,11 @@ inline void glCOLOR( COLOR col)
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error please define for your platform.
|
col = ( ((col << 24) ) |
|
||||||
|
((col << 8) & 0x00FF0000) |
|
||||||
|
((col >> 8) & 0x0000FF00) |
|
||||||
|
((col >> 24) ) );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
pglColor4ubv((GLubyte*)&col);
|
pglColor4ubv((GLubyte*)&col);
|
||||||
|
|
|
@ -32,11 +32,9 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
#define W word ptr
|
#define W word ptr
|
||||||
#define B byte ptr
|
#define B byte ptr
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
#define ASMOPT 0
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
#define ASMOPT 1
|
#define ASMOPT 1
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
#define ASMOPT 1
|
#define ASMOPT 1
|
||||||
#else
|
#else
|
||||||
#define ASMOPT 0
|
#define ASMOPT 0
|
||||||
|
@ -1285,8 +1283,7 @@ static void RenderWater(void)
|
||||||
{ // SUB-SAMPLING
|
{ // SUB-SAMPLING
|
||||||
SLONG slHeightMapStep, slHeightRowStep;
|
SLONG slHeightMapStep, slHeightRowStep;
|
||||||
|
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
push ebx
|
push ebx
|
||||||
bsf ecx,D [_pixTexWidth]
|
bsf ecx,D [_pixTexWidth]
|
||||||
|
@ -1357,7 +1354,7 @@ pixLoop:
|
||||||
pop ebx
|
pop ebx
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
// rcg12152001 needed extra registers. :(
|
// rcg12152001 needed extra registers. :(
|
||||||
_slHeightMapStep_renderWater = slHeightMapStep;
|
_slHeightMapStep_renderWater = slHeightMapStep;
|
||||||
_pixBaseWidth_renderWater = pixBaseWidth;
|
_pixBaseWidth_renderWater = pixBaseWidth;
|
||||||
|
@ -1460,10 +1457,6 @@ pixLoop:
|
||||||
"cc", "memory"
|
"cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
|
||||||
#error fill in for your platform.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
PIX pixPos, pixDU, pixDV;
|
PIX pixPos, pixDU, pixDV;
|
||||||
|
@ -1626,7 +1619,7 @@ pixLoop2:
|
||||||
pop ebx
|
pop ebx
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"bsfl %[pixBaseWidth], %%eax \n\t"
|
"bsfl %[pixBaseWidth], %%eax \n\t"
|
||||||
"movl $32, %%edx \n\t"
|
"movl $32, %%edx \n\t"
|
||||||
|
@ -2146,7 +2139,7 @@ pixLoop4:
|
||||||
pop ebx
|
pop ebx
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"bsfl %[pixBaseWidth], %%eax \n\t"
|
"bsfl %[pixBaseWidth], %%eax \n\t"
|
||||||
"movl $32, %%edx \n\t"
|
"movl $32, %%edx \n\t"
|
||||||
|
@ -2976,7 +2969,7 @@ pixDone:
|
||||||
pop ebx
|
pop ebx
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"movl %[slColumnModulo], %%edx \n\t"
|
"movl %[slColumnModulo], %%edx \n\t"
|
||||||
"movl %[slBufferMask], %%ecx \n\t"
|
"movl %[slBufferMask], %%ecx \n\t"
|
||||||
|
@ -3119,7 +3112,7 @@ pixLoopF:
|
||||||
jnz rowLoopF
|
jnz rowLoopF
|
||||||
pop ebx
|
pop ebx
|
||||||
}
|
}
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
_pubHeat_RenderPlasmaFire = pubHeat; // ran out of registers. :/
|
_pubHeat_RenderPlasmaFire = pubHeat; // ran out of registers. :/
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"movl %[slHeatRowStep], %%eax \n\t"
|
"movl %[slHeatRowStep], %%eax \n\t"
|
||||||
|
|
|
@ -40,16 +40,6 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
#define W word ptr
|
#define W word ptr
|
||||||
#define B byte ptr
|
#define B byte ptr
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
|
||||||
#define ASMOPT 0
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
#define ASMOPT 1
|
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
#define ASMOPT 1
|
|
||||||
#else
|
|
||||||
#define ASMOPT 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern INDEX shd_bFineQuality;
|
extern INDEX shd_bFineQuality;
|
||||||
extern INDEX shd_iFiltering;
|
extern INDEX shd_iFiltering;
|
||||||
extern INDEX shd_iDithering;
|
extern INDEX shd_iDithering;
|
||||||
|
@ -290,8 +280,7 @@ void CLayerMixer::AddAmbientPoint(void)
|
||||||
_slLightMax<<=7;
|
_slLightMax<<=7;
|
||||||
_slLightStep>>=1;
|
_slLightStep>>=1;
|
||||||
|
|
||||||
#if (ASMOPT == 1)
|
#if (defined __MSVC_INLINE__)
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
// prepare interpolants
|
// prepare interpolants
|
||||||
movd mm0,D [_slL2Row]
|
movd mm0,D [_slL2Row]
|
||||||
|
@ -364,7 +353,7 @@ skipPixel:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG tmp1, tmp2;
|
ULONG tmp1, tmp2;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
// prepare interpolants
|
// prepare interpolants
|
||||||
|
@ -439,10 +428,6 @@ skipPixel:
|
||||||
: FPU_REGS, MMX_REGS, "eax", "ecx", "edi", "cc", "memory"
|
: FPU_REGS, MMX_REGS, "eax", "ecx", "edi", "cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
|
||||||
#error Write inline asm for your platform.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// !!! FIXME WARNING: I have not checked this code, and it could be
|
// !!! FIXME WARNING: I have not checked this code, and it could be
|
||||||
|
@ -496,8 +481,7 @@ void CLayerMixer::AddAmbientMaskPoint( UBYTE *pubMask, UBYTE ubMask)
|
||||||
_slLightStep>>=1;
|
_slLightStep>>=1;
|
||||||
|
|
||||||
|
|
||||||
#if (ASMOPT == 1)
|
#if (defined __MSVC_INLINE__)
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
// prepare interpolants
|
// prepare interpolants
|
||||||
movd mm0,D [_slL2Row]
|
movd mm0,D [_slL2Row]
|
||||||
|
@ -576,7 +560,7 @@ skipPixel:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG tmp1, tmp2;
|
ULONG tmp1, tmp2;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
// prepare interpolants
|
// prepare interpolants
|
||||||
|
@ -660,10 +644,6 @@ skipPixel:
|
||||||
"cc", "memory"
|
"cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
|
||||||
#error Please write inline assembly for your platform.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else // Portable C version...
|
#else // Portable C version...
|
||||||
|
|
||||||
UBYTE* pubLayer = (UBYTE*)_pulLayer;
|
UBYTE* pubLayer = (UBYTE*)_pulLayer;
|
||||||
|
@ -723,8 +703,7 @@ void CLayerMixer::AddDiffusionPoint(void)
|
||||||
_slLightMax<<=7;
|
_slLightMax<<=7;
|
||||||
_slLightStep>>=1;
|
_slLightStep>>=1;
|
||||||
|
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
// prepare interpolants
|
// prepare interpolants
|
||||||
movd mm0,D [_slL2Row]
|
movd mm0,D [_slL2Row]
|
||||||
|
@ -796,7 +775,7 @@ skipPixel:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG tmp1, tmp2;
|
ULONG tmp1, tmp2;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
// prepare interpolants
|
// prepare interpolants
|
||||||
|
@ -871,10 +850,6 @@ skipPixel:
|
||||||
: FPU_REGS, MMX_REGS, "eax", "ecx", "edi", "cc", "memory"
|
: FPU_REGS, MMX_REGS, "eax", "ecx", "edi", "cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
|
||||||
#error Write inline assembly for your platform.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
// for each pixel in the shadow map
|
// for each pixel in the shadow map
|
||||||
UBYTE* pubLayer = (UBYTE*)_pulLayer;
|
UBYTE* pubLayer = (UBYTE*)_pulLayer;
|
||||||
|
@ -929,8 +904,7 @@ void CLayerMixer::AddDiffusionMaskPoint( UBYTE *pubMask, UBYTE ubMask)
|
||||||
_slLightMax<<=7;
|
_slLightMax<<=7;
|
||||||
_slLightStep>>=1;
|
_slLightStep>>=1;
|
||||||
|
|
||||||
#if (ASMOPT == 1)
|
#if (defined __MSVC_INLINE__)
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
// prepare interpolants
|
// prepare interpolants
|
||||||
movd mm0,D [_slL2Row]
|
movd mm0,D [_slL2Row]
|
||||||
|
@ -1008,7 +982,7 @@ skipPixel:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG tmp1, tmp2;
|
ULONG tmp1, tmp2;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
// prepare interpolants
|
// prepare interpolants
|
||||||
|
@ -1091,11 +1065,6 @@ skipPixel:
|
||||||
"cc", "memory"
|
"cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
|
||||||
#error Write inline ASM for your platform.
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// for each pixel in the shadow map
|
// for each pixel in the shadow map
|
||||||
|
@ -1201,8 +1170,7 @@ BOOL CLayerMixer::PrepareOneLayerPoint( CBrushShadowLayer *pbsl, BOOL bNoMask)
|
||||||
FLOAT fDL2oDV = fDDL2oDV + 2*(lm_vStepV%v00);
|
FLOAT fDL2oDV = fDDL2oDV + 2*(lm_vStepV%v00);
|
||||||
//_v00 = v00;
|
//_v00 = v00;
|
||||||
|
|
||||||
#if ((ASMOPT == 1) && (!defined __GNU_INLINE__))
|
#if (defined __MSVC_INLINE__)
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
fld D [fDDL2oDU]
|
fld D [fDDL2oDU]
|
||||||
fadd D [fDDL2oDU]
|
fadd D [fDDL2oDU]
|
||||||
|
@ -1230,12 +1198,6 @@ BOOL CLayerMixer::PrepareOneLayerPoint( CBrushShadowLayer *pbsl, BOOL bNoMask)
|
||||||
fistp D [_slDDL2oDV]
|
fistp D [_slDDL2oDV]
|
||||||
fistp D [_slDDL2oDU]
|
fistp D [_slDDL2oDU]
|
||||||
}
|
}
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
STUBBED("inline asm.");
|
|
||||||
#else
|
|
||||||
#error Please write inline assembly for your platform.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
fDDL2oDU *= 2;
|
fDDL2oDU *= 2;
|
||||||
fDDL2oDV *= 2;
|
fDDL2oDV *= 2;
|
||||||
|
@ -1321,8 +1283,7 @@ void CLayerMixer::AddOneLayerGradient( CGradientParameters &gp)
|
||||||
_pulLayer = lm_pulShadowMap;
|
_pulLayer = lm_pulShadowMap;
|
||||||
FLOAT fStart = Clamp( fGr00-(fDGroDJ+fDGroDI)*0.5f, 0.0f, 1.0f);
|
FLOAT fStart = Clamp( fGr00-(fDGroDJ+fDGroDI)*0.5f, 0.0f, 1.0f);
|
||||||
|
|
||||||
#if ((ASMOPT == 1) && (!defined __GNU_INLINE__))
|
#if (defined __MSVC_INLINE__)
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__int64 mmRowAdv;
|
__int64 mmRowAdv;
|
||||||
SLONG fixGRow = (fGr00-(fDGroDJ+fDGroDI)*0.5f)*32767.0f; // 16:15
|
SLONG fixGRow = (fGr00-(fDGroDJ+fDGroDI)*0.5f)*32767.0f; // 16:15
|
||||||
SLONG slModulo = (lm_pixCanvasSizeU-lm_pixPolygonSizeU) *BYTES_PER_TEXEL;
|
SLONG slModulo = (lm_pixCanvasSizeU-lm_pixPolygonSizeU) *BYTES_PER_TEXEL;
|
||||||
|
@ -1436,14 +1397,6 @@ rowNext:
|
||||||
rowDone:
|
rowDone:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
|
|
||||||
STUBBED("WRITE ME. Argh.");
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error Need inline assembly for your platform.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
// well, make gradient ...
|
// well, make gradient ...
|
||||||
SLONG slR0=0,slG0=0,slB0=0;
|
SLONG slR0=0,slG0=0,slB0=0;
|
||||||
|
@ -1528,9 +1481,8 @@ rowDone:
|
||||||
// apply directional light or ambient to layer
|
// apply directional light or ambient to layer
|
||||||
void CLayerMixer::AddDirectional(void)
|
void CLayerMixer::AddDirectional(void)
|
||||||
{
|
{
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
ULONG ulLight = ByteSwap( lm_colLight);
|
ULONG ulLight = ByteSwap( lm_colLight);
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
// prepare pointers and variables
|
// prepare pointers and variables
|
||||||
mov edi,D [_pulLayer]
|
mov edi,D [_pulLayer]
|
||||||
|
@ -1565,7 +1517,8 @@ rowNext:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
|
ULONG ulLight = ByteSwap( lm_colLight);
|
||||||
ULONG tmp;
|
ULONG tmp;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
// prepare pointers and variables
|
// prepare pointers and variables
|
||||||
|
@ -1608,10 +1561,6 @@ rowNext:
|
||||||
: FPU_REGS, "mm5", "mm6", "ecx", "edi", "cc", "memory"
|
: FPU_REGS, "mm5", "mm6", "ecx", "edi", "cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
|
||||||
#error Write inline assembly for your platform.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
UBYTE* pubLayer = (UBYTE*)_pulLayer;
|
UBYTE* pubLayer = (UBYTE*)_pulLayer;
|
||||||
// for each pixel in the shadow map
|
// for each pixel in the shadow map
|
||||||
|
@ -1631,9 +1580,8 @@ rowNext:
|
||||||
// apply directional light thru mask to layer
|
// apply directional light thru mask to layer
|
||||||
void CLayerMixer::AddMaskDirectional( UBYTE *pubMask, UBYTE ubMask)
|
void CLayerMixer::AddMaskDirectional( UBYTE *pubMask, UBYTE ubMask)
|
||||||
{
|
{
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
ULONG ulLight = ByteSwap( lm_colLight);
|
ULONG ulLight = ByteSwap( lm_colLight);
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
// prepare some local variables
|
// prepare some local variables
|
||||||
__asm {
|
__asm {
|
||||||
// prepare pointers and variables
|
// prepare pointers and variables
|
||||||
|
@ -1665,7 +1613,8 @@ skipLight:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
|
ULONG ulLight = ByteSwap( lm_colLight);
|
||||||
ULONG tmp;
|
ULONG tmp;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
// prepare pointers and variables
|
// prepare pointers and variables
|
||||||
|
@ -1706,10 +1655,6 @@ skipLight:
|
||||||
"cc", "memory"
|
"cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
|
||||||
#error Please write inline assembly for your platform.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
UBYTE* pubLayer = (UBYTE*)_pulLayer;
|
UBYTE* pubLayer = (UBYTE*)_pulLayer;
|
||||||
// for each pixel in the shadow map
|
// for each pixel in the shadow map
|
||||||
|
@ -1832,7 +1777,33 @@ void CLayerMixer::MixOneMipmap(CBrushShadowMap *pbsm, INDEX iMipmap)
|
||||||
}
|
}
|
||||||
} // set initial color
|
} // set initial color
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
|
__asm {
|
||||||
|
cld
|
||||||
|
mov ebx,D [this]
|
||||||
|
mov ecx,D [ebx].lm_pixCanvasSizeU
|
||||||
|
imul ecx,D [ebx].lm_pixCanvasSizeV
|
||||||
|
mov edi,D [ebx].lm_pulShadowMap
|
||||||
|
mov eax,D [colAmbient]
|
||||||
|
bswap eax
|
||||||
|
rep stosd
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
|
ULONG clob1, clob2, clob3;
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"cld \n\t"
|
||||||
|
"imull %%esi, %%ecx \n\t"
|
||||||
|
"bswapl %%eax \n\t"
|
||||||
|
"rep \n\t"
|
||||||
|
"stosl \n\t"
|
||||||
|
: "=a" (clob1), "=c" (clob2), "=D" (clob3)
|
||||||
|
: "c" (this->lm_pixCanvasSizeU), "S" (this->lm_pixCanvasSizeV),
|
||||||
|
"a" (colAmbient), "D" (this->lm_pulShadowMap)
|
||||||
|
: "cc", "memory"
|
||||||
|
);
|
||||||
|
|
||||||
|
#else
|
||||||
register ULONG count = this->lm_pixCanvasSizeU * this->lm_pixCanvasSizeV;
|
register ULONG count = this->lm_pixCanvasSizeU * this->lm_pixCanvasSizeV;
|
||||||
#if PLATFORM_LITTLEENDIAN
|
#if PLATFORM_LITTLEENDIAN
|
||||||
// Forces C fallback; BYTESWAP itself is a no-op on little endian.
|
// Forces C fallback; BYTESWAP itself is a no-op on little endian.
|
||||||
|
@ -1850,35 +1821,7 @@ void CLayerMixer::MixOneMipmap(CBrushShadowMap *pbsm, INDEX iMipmap)
|
||||||
ptr++;
|
ptr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
#endif
|
||||||
__asm {
|
|
||||||
cld
|
|
||||||
mov ebx,D [this]
|
|
||||||
mov ecx,D [ebx].lm_pixCanvasSizeU
|
|
||||||
imul ecx,D [ebx].lm_pixCanvasSizeV
|
|
||||||
mov edi,D [ebx].lm_pulShadowMap
|
|
||||||
mov eax,D [colAmbient]
|
|
||||||
bswap eax
|
|
||||||
rep stosd
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
ULONG clob1, clob2, clob3;
|
|
||||||
__asm__ __volatile__ (
|
|
||||||
"cld \n\t"
|
|
||||||
"imull %%esi, %%ecx \n\t"
|
|
||||||
"bswapl %%eax \n\t"
|
|
||||||
"rep \n\t"
|
|
||||||
"stosl \n\t"
|
|
||||||
: "=a" (clob1), "=c" (clob2), "=D" (clob3)
|
|
||||||
: "c" (this->lm_pixCanvasSizeU), "S" (this->lm_pixCanvasSizeV),
|
|
||||||
"a" (colAmbient), "D" (this->lm_pulShadowMap)
|
|
||||||
: "cc", "memory"
|
|
||||||
);
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error Please write inline assembly for your platform.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
_pfWorldEditingProfile.StopTimer(CWorldEditingProfile::PTI_AMBIENTFILL);
|
_pfWorldEditingProfile.StopTimer(CWorldEditingProfile::PTI_AMBIENTFILL);
|
||||||
|
|
||||||
|
@ -1955,9 +1898,7 @@ void CLayerMixer::MixOneMipmap(CBrushShadowMap *pbsm, INDEX iMipmap)
|
||||||
// copy from static shadow map to dynamic layer
|
// copy from static shadow map to dynamic layer
|
||||||
__forceinline void CLayerMixer::CopyShadowLayer(void)
|
__forceinline void CLayerMixer::CopyShadowLayer(void)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
memcpy(lm_pulShadowMap, lm_pulStaticShadowMap, lm_pixCanvasSizeU*lm_pixCanvasSizeV*4);
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
cld
|
cld
|
||||||
mov ebx,D [this]
|
mov ebx,D [this]
|
||||||
|
@ -1967,7 +1908,7 @@ __forceinline void CLayerMixer::CopyShadowLayer(void)
|
||||||
mov edi,D [ebx].lm_pulShadowMap
|
mov edi,D [ebx].lm_pulShadowMap
|
||||||
rep movsd
|
rep movsd
|
||||||
}
|
}
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG clob1, clob2, clob3;
|
ULONG clob1, clob2, clob3;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"cld \n\t"
|
"cld \n\t"
|
||||||
|
@ -1980,21 +1921,16 @@ __forceinline void CLayerMixer::CopyShadowLayer(void)
|
||||||
: "cc", "memory"
|
: "cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Please write inline assembly for your platform.
|
memcpy(lm_pulShadowMap, lm_pulStaticShadowMap, lm_pixCanvasSizeU*lm_pixCanvasSizeV*4);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// copy from static shadow map to dynamic layer
|
// copy from static shadow map to dynamic layer
|
||||||
__forceinline void CLayerMixer::FillShadowLayer( COLOR col)
|
__forceinline void CLayerMixer::FillShadowLayer( COLOR col)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
DWORD* dst = (DWORD*)lm_pulShadowMap;
|
|
||||||
int n = lm_pixCanvasSizeU*lm_pixCanvasSizeV;
|
|
||||||
DWORD color = __builtin_bswap32(col);
|
|
||||||
while(n--) {*(dst++)=color;}
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
cld
|
cld
|
||||||
mov ebx,D [this]
|
mov ebx,D [this]
|
||||||
|
@ -2006,7 +1942,7 @@ __forceinline void CLayerMixer::FillShadowLayer( COLOR col)
|
||||||
rep stosd
|
rep stosd
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
ULONG clob1, clob2, clob3;
|
ULONG clob1, clob2, clob3;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"cld \n\t"
|
"cld \n\t"
|
||||||
|
@ -2020,9 +1956,12 @@ __forceinline void CLayerMixer::FillShadowLayer( COLOR col)
|
||||||
: "cc", "memory"
|
: "cc", "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Please write inline assembly for your platform.
|
DWORD* dst = (DWORD*)lm_pulShadowMap;
|
||||||
#endif
|
int n = lm_pixCanvasSizeU*lm_pixCanvasSizeV;
|
||||||
|
DWORD color = __builtin_bswap32(col);
|
||||||
|
while(n--) {*(dst++)=color;}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -24,20 +24,11 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
#define _PC_64 0x0300
|
#define _PC_64 0x0300
|
||||||
|
|
||||||
// !!! FIXME: I'd like to remove any dependency on the FPU control word from the game, asap. --ryan.
|
// !!! FIXME: I'd like to remove any dependency on the FPU control word from the game, asap. --ryan.
|
||||||
#ifdef USE_PORTABLE_C
|
#if (defined _MSC_VER)
|
||||||
// Fake control87 for USE_PORTABLE_C version
|
|
||||||
inline ULONG _control87(WORD newcw, WORD mask)
|
|
||||||
{
|
|
||||||
static WORD fpw=_PC_64;
|
|
||||||
if (mask != 0)
|
|
||||||
{
|
|
||||||
fpw &= ~mask;
|
|
||||||
fpw |= (newcw & mask);
|
|
||||||
}
|
|
||||||
return(fpw);
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
// _control87 is provided by the compiler
|
||||||
|
|
||||||
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
|
|
||||||
inline ULONG _control87(WORD newcw, WORD mask)
|
inline ULONG _control87(WORD newcw, WORD mask)
|
||||||
{
|
{
|
||||||
|
@ -74,8 +65,20 @@ inline ULONG _control87(WORD newcw, WORD mask)
|
||||||
return(fpw);
|
return(fpw);
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (!defined _MSC_VER)
|
#else
|
||||||
#error Implement for your platform, or add a stub conditional here.
|
|
||||||
|
// Fake control87 for USE_PORTABLE_C version
|
||||||
|
inline ULONG _control87(WORD newcw, WORD mask)
|
||||||
|
{
|
||||||
|
static WORD fpw=_PC_64;
|
||||||
|
if (mask != 0)
|
||||||
|
{
|
||||||
|
fpw &= ~mask;
|
||||||
|
fpw |= (newcw & mask);
|
||||||
|
}
|
||||||
|
return(fpw);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Get current precision setting of FPU. */
|
/* Get current precision setting of FPU. */
|
||||||
|
|
|
@ -312,12 +312,7 @@ inline FLOAT NormByteToFloat( const ULONG ul)
|
||||||
// fast float to int conversion
|
// fast float to int conversion
|
||||||
inline SLONG FloatToInt( FLOAT f)
|
inline SLONG FloatToInt( FLOAT f)
|
||||||
{
|
{
|
||||||
#if defined(__arm__) || defined(USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
// round to nearest by adding/subtracting 0.5 (depending on f pos/neg) before converting to SLONG
|
|
||||||
float addToRound = copysignf(0.5f, f); // copy f's signbit to 0.5 => if f<0 then addToRound = -0.5, else 0.5
|
|
||||||
return((SLONG) (f + addToRound));
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
SLONG slRet;
|
SLONG slRet;
|
||||||
__asm {
|
__asm {
|
||||||
fld D [f]
|
fld D [f]
|
||||||
|
@ -325,7 +320,7 @@ inline SLONG FloatToInt( FLOAT f)
|
||||||
}
|
}
|
||||||
return slRet;
|
return slRet;
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
SLONG slRet;
|
SLONG slRet;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"flds (%%eax) \n\t"
|
"flds (%%eax) \n\t"
|
||||||
|
@ -336,16 +331,16 @@ inline SLONG FloatToInt( FLOAT f)
|
||||||
);
|
);
|
||||||
return(slRet);
|
return(slRet);
|
||||||
#else
|
#else
|
||||||
#error Fill this in for your platform.
|
// round to nearest by adding/subtracting 0.5 (depending on f pos/neg) before converting to SLONG
|
||||||
|
float addToRound = copysignf(0.5f, f); // copy f's signbit to 0.5 => if f<0 then addToRound = -0.5, else 0.5
|
||||||
|
return((SLONG) (f + addToRound));
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// log base 2 of any float numero
|
// log base 2 of any float numero
|
||||||
inline FLOAT Log2( FLOAT f) {
|
inline FLOAT Log2( FLOAT f) {
|
||||||
#if (defined USE_PORTABLE_C) || defined(__arm__)
|
#if (defined __MSVC_INLINE__)
|
||||||
return log2f(f);
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
FLOAT fRet;
|
FLOAT fRet;
|
||||||
_asm {
|
_asm {
|
||||||
fld1
|
fld1
|
||||||
|
@ -355,7 +350,7 @@ inline FLOAT Log2( FLOAT f) {
|
||||||
}
|
}
|
||||||
return fRet;
|
return fRet;
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
FLOAT fRet;
|
FLOAT fRet;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"fld1 \n\t"
|
"fld1 \n\t"
|
||||||
|
@ -368,7 +363,8 @@ inline FLOAT Log2( FLOAT f) {
|
||||||
);
|
);
|
||||||
return(fRet);
|
return(fRet);
|
||||||
#else
|
#else
|
||||||
#error Fill this in for your platform.
|
return log2f(f);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -376,8 +372,24 @@ inline FLOAT Log2( FLOAT f) {
|
||||||
// returns accurate values only for integers that are power of 2
|
// returns accurate values only for integers that are power of 2
|
||||||
inline SLONG FastLog2( SLONG x)
|
inline SLONG FastLog2( SLONG x)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
#ifdef __GNUC__
|
SLONG slRet;
|
||||||
|
__asm {
|
||||||
|
bsr eax,D [x]
|
||||||
|
mov D [slRet],eax
|
||||||
|
}
|
||||||
|
return slRet;
|
||||||
|
|
||||||
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
|
SLONG slRet;
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"bsrl %%ecx, %%eax \n\t"
|
||||||
|
: "=a" (slRet)
|
||||||
|
: "c" (x)
|
||||||
|
: "memory"
|
||||||
|
);
|
||||||
|
return(slRet);
|
||||||
|
#elif (defined __GNUC__)
|
||||||
if(x == 0) return 0; // __builtin_clz() is undefined for 0
|
if(x == 0) return 0; // __builtin_clz() is undefined for 0
|
||||||
int numLeadingZeros = __builtin_clz(x);
|
int numLeadingZeros = __builtin_clz(x);
|
||||||
return 31 - numLeadingZeros;
|
return 31 - numLeadingZeros;
|
||||||
|
@ -393,38 +405,13 @@ inline SLONG FastLog2( SLONG x)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
SLONG slRet;
|
|
||||||
__asm {
|
|
||||||
bsr eax,D [x]
|
|
||||||
mov D [slRet],eax
|
|
||||||
}
|
|
||||||
return slRet;
|
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
SLONG slRet;
|
|
||||||
__asm__ __volatile__ (
|
|
||||||
"bsrl %%ecx, %%eax \n\t"
|
|
||||||
: "=a" (slRet)
|
|
||||||
: "c" (x)
|
|
||||||
: "memory"
|
|
||||||
);
|
|
||||||
return(slRet);
|
|
||||||
#else
|
|
||||||
#error Fill this in for your platform.
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* DG: function is unused => doesn't matter that portable implementation is not optimal :)
|
/* DG: function is unused => doesn't matter that portable implementation is not optimal :)
|
||||||
// returns log2 of first larger value that is a power of 2
|
// returns log2 of first larger value that is a power of 2
|
||||||
inline SLONG FastMaxLog2( SLONG x)
|
inline SLONG FastMaxLog2( SLONG x)
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
printf("CHECK THIS: %s:%d\n", __FILE__, __LINE__);
|
|
||||||
return((SLONG) log2((double) x));
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
SLONG slRet;
|
SLONG slRet;
|
||||||
__asm {
|
__asm {
|
||||||
bsr eax,D [x]
|
bsr eax,D [x]
|
||||||
|
@ -435,7 +422,7 @@ printf("CHECK THIS: %s:%d\n", __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
return slRet;
|
return slRet;
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
SLONG slRet;
|
SLONG slRet;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"bsrl %%ecx, %%eax \n\t"
|
"bsrl %%ecx, %%eax \n\t"
|
||||||
|
@ -448,7 +435,9 @@ printf("CHECK THIS: %s:%d\n", __FILE__, __LINE__);
|
||||||
);
|
);
|
||||||
return(slRet);
|
return(slRet);
|
||||||
#else
|
#else
|
||||||
#error Fill this in for your platform.
|
printf("CHECK THIS: %s:%d\n", __FILE__, __LINE__);
|
||||||
|
return((SLONG) log2((double) x));
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -40,14 +40,6 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
#define W word ptr
|
#define W word ptr
|
||||||
#define B byte ptr
|
#define B byte ptr
|
||||||
|
|
||||||
#if (defined __MSVC_INLINE__)
|
|
||||||
#define ASMOPT 1
|
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
#define ASMOPT 0 // !!! FIXME: rcg10112001 Write GCC inline asm versions...
|
|
||||||
#else
|
|
||||||
#define ASMOPT 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
extern BOOL CVA_bModels;
|
extern BOOL CVA_bModels;
|
||||||
extern BOOL GFX_bTruform;
|
extern BOOL GFX_bTruform;
|
||||||
|
@ -663,7 +655,7 @@ static FLOAT _fHazeAdd;
|
||||||
// check vertex against fog
|
// check vertex against fog
|
||||||
static void GetFogMapInVertex( GFXVertex3 &vtx, GFXTexCoord &tex)
|
static void GetFogMapInVertex( GFXVertex3 &vtx, GFXTexCoord &tex)
|
||||||
{
|
{
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
__asm {
|
__asm {
|
||||||
mov esi,D [vtx]
|
mov esi,D [vtx]
|
||||||
mov edi,D [tex]
|
mov edi,D [tex]
|
||||||
|
@ -708,7 +700,7 @@ static void GetFogMapInVertex( GFXVertex3 &vtx, GFXTexCoord &tex)
|
||||||
// check vertex against haze
|
// check vertex against haze
|
||||||
static void GetHazeMapInVertex( GFXVertex3 &vtx, FLOAT &tx1)
|
static void GetHazeMapInVertex( GFXVertex3 &vtx, FLOAT &tx1)
|
||||||
{
|
{
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
__asm {
|
__asm {
|
||||||
mov esi,D [vtx]
|
mov esi,D [vtx]
|
||||||
mov edi,D [tx1]
|
mov edi,D [tx1]
|
||||||
|
@ -1080,7 +1072,7 @@ static void UnpackFrame( CRenderModel &rm, BOOL bKeepNormals)
|
||||||
const ModelFrameVertex16 *pFrame1 = rm.rm_pFrame16_1;
|
const ModelFrameVertex16 *pFrame1 = rm.rm_pFrame16_1;
|
||||||
if( pFrame0==pFrame1)
|
if( pFrame0==pFrame1)
|
||||||
{
|
{
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
// for each vertex in mip
|
// for each vertex in mip
|
||||||
const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
|
const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
|
||||||
SLONG slTmp1, slTmp2, slTmp3;
|
SLONG slTmp1, slTmp2, slTmp3;
|
||||||
|
@ -1196,7 +1188,7 @@ vtxNext16:
|
||||||
// if lerping
|
// if lerping
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
// for each vertex in mip
|
// for each vertex in mip
|
||||||
const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
|
const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
|
||||||
SLONG slTmp1, slTmp2, slTmp3;
|
SLONG slTmp1, slTmp2, slTmp3;
|
||||||
|
@ -1365,7 +1357,7 @@ vtxNext16L:
|
||||||
// if no lerping
|
// if no lerping
|
||||||
if( pFrame0==pFrame1)
|
if( pFrame0==pFrame1)
|
||||||
{
|
{
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
// for each vertex in mip
|
// for each vertex in mip
|
||||||
const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
|
const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
|
||||||
SLONG slTmp1, slTmp2, slTmp3;
|
SLONG slTmp1, slTmp2, slTmp3;
|
||||||
|
@ -1464,7 +1456,7 @@ vtxNext8:
|
||||||
// if lerping
|
// if lerping
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
|
const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
|
||||||
SLONG slTmp1, slTmp2, slTmp3;
|
SLONG slTmp1, slTmp2, slTmp3;
|
||||||
// re-adjust stretching factors because of fixint lerping (divide by 256)
|
// re-adjust stretching factors because of fixint lerping (divide by 256)
|
||||||
|
@ -1610,7 +1602,7 @@ vtxNext8L:
|
||||||
}
|
}
|
||||||
|
|
||||||
// generate colors from shades
|
// generate colors from shades
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
__asm {
|
__asm {
|
||||||
pxor mm0,mm0
|
pxor mm0,mm0
|
||||||
// construct 64-bit RGBA light
|
// construct 64-bit RGBA light
|
||||||
|
@ -1974,7 +1966,7 @@ void CModelObject::RenderModel_View( CRenderModel &rm)
|
||||||
pvtxSrfBase = &_avtxSrfBase[iSrfVx0];
|
pvtxSrfBase = &_avtxSrfBase[iSrfVx0];
|
||||||
INDEX iSrfVx;
|
INDEX iSrfVx;
|
||||||
|
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
__asm {
|
__asm {
|
||||||
push ebx
|
push ebx
|
||||||
mov ebx,D [puwSrfToMip]
|
mov ebx,D [puwSrfToMip]
|
||||||
|
@ -2074,7 +2066,7 @@ srfVtxLoop:
|
||||||
const COLOR colD = AdjustColor( ms.ms_colDiffuse, _slTexHueShift, _slTexSaturation);
|
const COLOR colD = AdjustColor( ms.ms_colDiffuse, _slTexHueShift, _slTexSaturation);
|
||||||
colSrfDiff.MultiplyRGBA( colD, colMdlDiff);
|
colSrfDiff.MultiplyRGBA( colD, colMdlDiff);
|
||||||
|
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
// setup texcoord array
|
// setup texcoord array
|
||||||
__asm {
|
__asm {
|
||||||
push ebx
|
push ebx
|
||||||
|
@ -2134,7 +2126,7 @@ vtxEnd:
|
||||||
for( INDEX iSrfVx=0; iSrfVx<ctSrfVx; iSrfVx++) pcolSrfBase[iSrfVx] = colSrfDiffAdj;
|
for( INDEX iSrfVx=0; iSrfVx<ctSrfVx; iSrfVx++) pcolSrfBase[iSrfVx] = colSrfDiffAdj;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
// setup color array
|
// setup color array
|
||||||
const COLOR colS = colSrfDiff.ul.abgr;
|
const COLOR colS = colSrfDiff.ul.abgr;
|
||||||
__asm {
|
__asm {
|
||||||
|
@ -2335,7 +2327,7 @@ diffColLoop:
|
||||||
// cache rotation
|
// cache rotation
|
||||||
const FLOATmatrix3D &m = rm.rm_mObjectRotation;
|
const FLOATmatrix3D &m = rm.rm_mObjectRotation;
|
||||||
|
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
__asm {
|
__asm {
|
||||||
push ebx
|
push ebx
|
||||||
mov ebx,D [m]
|
mov ebx,D [m]
|
||||||
|
@ -2530,7 +2522,7 @@ reflMipLoop:
|
||||||
// cache object view rotation
|
// cache object view rotation
|
||||||
const FLOATmatrix3D &m = rm.rm_mObjectToView;
|
const FLOATmatrix3D &m = rm.rm_mObjectToView;
|
||||||
|
|
||||||
#if ASMOPT == 1
|
#if (defined __MSVC_INLINE__)
|
||||||
__asm {
|
__asm {
|
||||||
push ebx
|
push ebx
|
||||||
mov ebx,D [m]
|
mov ebx,D [m]
|
||||||
|
|
|
@ -105,10 +105,7 @@ static SLONG slTmp;
|
||||||
|
|
||||||
static inline PIX PIXCoord(FLOAT f) // (f+0.9999f) or (ceil(f))
|
static inline PIX PIXCoord(FLOAT f) // (f+0.9999f) or (ceil(f))
|
||||||
{
|
{
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
return((PIX) (f+0.9999f));
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
PIX pixRet;
|
PIX pixRet;
|
||||||
__asm {
|
__asm {
|
||||||
fld dword ptr [f]
|
fld dword ptr [f]
|
||||||
|
@ -123,7 +120,7 @@ static inline PIX PIXCoord(FLOAT f) // (f+0.9999f) or (ceil(f))
|
||||||
}
|
}
|
||||||
return pixRet;
|
return pixRet;
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
PIX pixRet;
|
PIX pixRet;
|
||||||
SLONG clobber;
|
SLONG clobber;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
|
@ -142,7 +139,8 @@ static inline PIX PIXCoord(FLOAT f) // (f+0.9999f) or (ceil(f))
|
||||||
return pixRet;
|
return pixRet;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Please write inline ASM for your platform.
|
return((PIX) (f+0.9999f));
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,17 +43,15 @@ static CSoundData *psd;
|
||||||
|
|
||||||
// nasm on MacOS X is getting wrong addresses of external globals, so I have
|
// nasm on MacOS X is getting wrong addresses of external globals, so I have
|
||||||
// to define them in the .asm file...lame.
|
// to define them in the .asm file...lame.
|
||||||
#ifdef __GNU_INLINE__
|
#if (defined __GNU_INLINE_X86_32__) && (defined USE_I386_NASM_ASM)
|
||||||
#ifdef USE_PORTABLE_C
|
|
||||||
#define INASM
|
|
||||||
#else
|
|
||||||
#define INASM extern
|
#define INASM extern
|
||||||
#endif
|
#elif (defined __MSVC_INLINE__)
|
||||||
#else
|
|
||||||
#define INASM static
|
#define INASM static
|
||||||
static __int64 mmInvFactor = 0x00007FFF00007FFF;
|
static __int64 mmInvFactor = 0x00007FFF00007FFF;
|
||||||
static FLOAT f65536 = 65536.0f;
|
static FLOAT f65536 = 65536.0f;
|
||||||
static FLOAT f4G = 4294967296.0f;
|
static FLOAT f4G = 4294967296.0f;
|
||||||
|
#else
|
||||||
|
#define INASM static
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
INASM SLONG slMixerBufferSize; // size in samples per channel of the destination buffers
|
INASM SLONG slMixerBufferSize; // size in samples per channel of the destination buffers
|
||||||
|
@ -81,11 +79,7 @@ void ResetMixer( const SLONG *pslBuffer, const SLONG slBufferSize)
|
||||||
slMixerBufferSampleRate = _pSound->sl_SwfeFormat.nSamplesPerSec;
|
slMixerBufferSampleRate = _pSound->sl_SwfeFormat.nSamplesPerSec;
|
||||||
|
|
||||||
// wipe destination mixer buffer
|
// wipe destination mixer buffer
|
||||||
// (Mac OS X uses this path because Apple's memset() is customized for each CPU they support and way faster than this inline asm. --ryan.)
|
#if (defined __MSVC_INLINE__)
|
||||||
#if ((defined USE_PORTABLE_C) || (PLATFORM_MACOSX))
|
|
||||||
memset(pvMixerBuffer, 0, slMixerBufferSize * 8);
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
cld
|
cld
|
||||||
xor eax,eax
|
xor eax,eax
|
||||||
|
@ -94,19 +88,8 @@ void ResetMixer( const SLONG *pslBuffer, const SLONG slBufferSize)
|
||||||
shl ecx,1 // *2 because of 32-bit src format
|
shl ecx,1 // *2 because of 32-bit src format
|
||||||
rep stosd
|
rep stosd
|
||||||
}
|
}
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
// !!! FIXME : rcg12172001 Is this REALLY any faster than memset()?
|
|
||||||
ULONG clob1, clob2;
|
|
||||||
__asm__ __volatile__ (
|
|
||||||
"cld \n\t"
|
|
||||||
"rep \n\t"
|
|
||||||
"stosl \n\t"
|
|
||||||
: "=D" (clob1), "=c" (clob2)
|
|
||||||
: "a" (0), "D" (pvMixerBuffer), "c" (slMixerBufferSize*2)
|
|
||||||
: "cc", "memory"
|
|
||||||
);
|
|
||||||
#else
|
#else
|
||||||
#error please write inline asm for your platform.
|
memset(pvMixerBuffer, 0, slMixerBufferSize * 8);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -118,10 +101,7 @@ void CopyMixerBuffer_stereo( const SLONG slSrcOffset, void *pDstBuffer, const SL
|
||||||
ASSERT( slBytes%4==0);
|
ASSERT( slBytes%4==0);
|
||||||
if( slBytes<4) return;
|
if( slBytes<4) return;
|
||||||
|
|
||||||
#if ((defined USE_PORTABLE_C) || (PLATFORM_MACOSX))
|
#if (defined __MSVC_INLINE__)
|
||||||
// (Mac OS X uses this path because Apple's memset() is customized for each CPU they support and way faster than this inline asm. --ryan.)
|
|
||||||
memcpy(pDstBuffer, ((const char *)pvMixerBuffer) + slSrcOffset, slBytes);
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
cld
|
cld
|
||||||
mov esi,D [slSrcOffset]
|
mov esi,D [slSrcOffset]
|
||||||
|
@ -131,21 +111,8 @@ void CopyMixerBuffer_stereo( const SLONG slSrcOffset, void *pDstBuffer, const SL
|
||||||
shr ecx,2 // bytes to samples per channel
|
shr ecx,2 // bytes to samples per channel
|
||||||
rep movsd
|
rep movsd
|
||||||
}
|
}
|
||||||
#elif (defined __GNU_INLINE__)
|
|
||||||
// !!! FIXME : rcg12172001 Is this REALLY any faster than memcpy()?
|
|
||||||
ULONG clob1, clob2, clob3;
|
|
||||||
__asm__ __volatile__ (
|
|
||||||
"cld \n\t"
|
|
||||||
"rep \n\t"
|
|
||||||
"movsl \n\t"
|
|
||||||
: "=S" (clob1), "=D" (clob2), "=c" (clob3)
|
|
||||||
: "S" (((char *)pvMixerBuffer) + slSrcOffset),
|
|
||||||
"D" (pDstBuffer),
|
|
||||||
"c" (slBytes >> 2)
|
|
||||||
: "cc", "memory"
|
|
||||||
);
|
|
||||||
#else
|
#else
|
||||||
#error please write inline asm for your platform.
|
memcpy(pDstBuffer, ((const char *)pvMixerBuffer) + slSrcOffset, slBytes);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,18 +124,7 @@ void CopyMixerBuffer_mono( const SLONG slSrcOffset, void *pDstBuffer, const SLON
|
||||||
ASSERT( slBytes%2==0);
|
ASSERT( slBytes%2==0);
|
||||||
if( slBytes<4) return;
|
if( slBytes<4) return;
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
// (This is untested, currently. --ryan.)
|
|
||||||
WORD *dest = (WORD *) pDstBuffer;
|
|
||||||
WORD *src = (WORD *) ( ((char *) pvMixerBuffer) + slSrcOffset );
|
|
||||||
SLONG max = slBytes / 4;
|
|
||||||
for (SLONG i = 0; i < max; i++) {
|
|
||||||
*dest = *src;
|
|
||||||
dest++; // move 16 bits.
|
|
||||||
src+=2; // move 32 bits.
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
mov esi,D [slSrcOffset]
|
mov esi,D [slSrcOffset]
|
||||||
add esi,D [pvMixerBuffer]
|
add esi,D [pvMixerBuffer]
|
||||||
|
@ -184,7 +140,7 @@ copyLoop:
|
||||||
jnz copyLoop
|
jnz copyLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"movl %[pvMixerBuffer], %%esi \n\t"
|
"movl %[pvMixerBuffer], %%esi \n\t"
|
||||||
"movl %[pDstBuffer], %%edi \n\t"
|
"movl %[pDstBuffer], %%edi \n\t"
|
||||||
|
@ -204,7 +160,15 @@ copyLoop:
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error please write inline asm for your platform.
|
// (This is untested, currently. --ryan.)
|
||||||
|
WORD *dest = (WORD *) pDstBuffer;
|
||||||
|
WORD *src = (WORD *) ( ((char *) pvMixerBuffer) + slSrcOffset );
|
||||||
|
SLONG max = slBytes / 4;
|
||||||
|
for (SLONG i = 0; i < max; i++) {
|
||||||
|
*dest = *src;
|
||||||
|
dest++; // move 16 bits.
|
||||||
|
src+=2; // move 32 bits.
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -215,24 +179,7 @@ static void ConvertMixerBuffer( const SLONG slBytes)
|
||||||
ASSERT( slBytes%4==0);
|
ASSERT( slBytes%4==0);
|
||||||
if( slBytes<4) return;
|
if( slBytes<4) return;
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
//STUBBED("ConvertMixerBuffer");
|
|
||||||
SWORD *dest = (SWORD *) pvMixerBuffer;
|
|
||||||
SLONG *src = (SLONG *) pvMixerBuffer;
|
|
||||||
SLONG max = slBytes / 2;
|
|
||||||
int tmp;
|
|
||||||
for (SLONG i = 0; i < max; i++) {
|
|
||||||
tmp = *src;
|
|
||||||
if (tmp>32767) tmp=32767;
|
|
||||||
if (tmp<-32767) tmp=-32767;
|
|
||||||
*dest=tmp;
|
|
||||||
dest++; // move 16 bits.
|
|
||||||
src++; // move 32 bits.
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
cld
|
cld
|
||||||
mov esi,D [pvMixerBuffer]
|
mov esi,D [pvMixerBuffer]
|
||||||
|
@ -250,7 +197,7 @@ copyLoop:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__)
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"movl %[pvMixerBuffer], %%esi \n\t"
|
"movl %[pvMixerBuffer], %%esi \n\t"
|
||||||
"movl %[pvMixerBuffer], %%edi \n\t"
|
"movl %[pvMixerBuffer], %%edi \n\t"
|
||||||
|
@ -271,7 +218,20 @@ copyLoop:
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error please write inline asm for your platform.
|
|
||||||
|
SWORD *dest = (SWORD *) pvMixerBuffer;
|
||||||
|
SLONG *src = (SLONG *) pvMixerBuffer;
|
||||||
|
SLONG max = slBytes / 2;
|
||||||
|
int tmp;
|
||||||
|
for (SLONG i = 0; i < max; i++) {
|
||||||
|
tmp = *src;
|
||||||
|
if (tmp>32767) tmp=32767;
|
||||||
|
if (tmp<-32767) tmp=-32767;
|
||||||
|
*dest=tmp;
|
||||||
|
dest++; // move 16 bits.
|
||||||
|
src++; // move 32 bits.
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -323,7 +283,7 @@ void NormalizeMixerBuffer( const FLOAT fNormStrength, const SLONG slBytes, FLOAT
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef __GNU_INLINE__
|
#if (defined __GNU_INLINE_X86_32__) && (defined USE_I386_NASM_ASM)
|
||||||
// These are implemented in an external NASM file.
|
// These are implemented in an external NASM file.
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void MixStereo_asm(CSoundObject *pso);
|
void MixStereo_asm(CSoundObject *pso);
|
||||||
|
@ -337,85 +297,7 @@ inline void MixMono( CSoundObject *pso)
|
||||||
{
|
{
|
||||||
_pfSoundProfile.StartTimer(CSoundProfile::PTI_RAWMIXER);
|
_pfSoundProfile.StartTimer(CSoundProfile::PTI_RAWMIXER);
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
#if (defined __MSVC_INLINE__)
|
||||||
// initialize some local vars
|
|
||||||
SLONG slLeftSample, slRightSample, slNextSample;
|
|
||||||
SLONG *pslDstBuffer = (SLONG*)pvMixerBuffer;
|
|
||||||
fixLeftOfs = (__int64)(fLeftOfs * 65536.0);
|
|
||||||
fixRightOfs = (__int64)(fRightOfs * 65536.0);
|
|
||||||
__int64 fixLeftStep = (__int64)(fLeftStep * 65536.0);
|
|
||||||
__int64 fixRightStep = (__int64)(fRightStep * 65536.0);
|
|
||||||
__int64 fixSoundBufferSize = ((__int64)slSoundBufferSize)<<16;
|
|
||||||
mmSurroundFactor = (__int64)(SWORD)mmSurroundFactor;
|
|
||||||
|
|
||||||
SLONG slLeftVolume_ = slLeftVolume >> 16;
|
|
||||||
SLONG slRightVolume_ = slRightVolume >> 16;
|
|
||||||
|
|
||||||
// loop thru source buffer
|
|
||||||
INDEX iCt = slMixerBufferSize;
|
|
||||||
FOREVER
|
|
||||||
{
|
|
||||||
// if left channel source sample came to end of sample buffer
|
|
||||||
if( fixLeftOfs >= fixSoundBufferSize) {
|
|
||||||
fixLeftOfs -= fixSoundBufferSize;
|
|
||||||
// if has no loop, end it
|
|
||||||
bEndOfSound = bNotLoop;
|
|
||||||
}
|
|
||||||
// if right channel source sample came to end of sample buffer
|
|
||||||
if( fixRightOfs >= fixSoundBufferSize) {
|
|
||||||
fixRightOfs -= fixSoundBufferSize;
|
|
||||||
// if has no loop, end it
|
|
||||||
bEndOfSound = bNotLoop;
|
|
||||||
}
|
|
||||||
// end of buffer?
|
|
||||||
if( iCt<=0 || bEndOfSound) break;
|
|
||||||
|
|
||||||
// fetch one lineary interpolated sample on left channel
|
|
||||||
slLeftSample = pswSrcBuffer[(fixLeftOfs>>16)+0];
|
|
||||||
slNextSample = pswSrcBuffer[(fixLeftOfs>>16)+1];
|
|
||||||
slLeftSample = (slLeftSample*(65535-(fixLeftOfs&65535)) + slNextSample*(fixLeftOfs&65535)) >>16;
|
|
||||||
// fetch one lineary interpolated sample on right channel
|
|
||||||
slRightSample = pswSrcBuffer[(fixRightOfs>>16)+0];
|
|
||||||
slNextSample = pswSrcBuffer[(fixRightOfs>>16)+1];
|
|
||||||
slRightSample = (slRightSample*(65535-(fixRightOfs&65535)) + slNextSample*(fixRightOfs&65535)) >>16;
|
|
||||||
|
|
||||||
// filter samples
|
|
||||||
slLastLeftSample += ((slLeftSample -slLastLeftSample) *slLeftFilter) >>15;
|
|
||||||
slLastRightSample += ((slRightSample-slLastRightSample)*slRightFilter)>>15;
|
|
||||||
|
|
||||||
// apply stereo volume to current sample
|
|
||||||
slLeftSample = (slLastLeftSample * slLeftVolume_) >>15;
|
|
||||||
slRightSample = (slLastRightSample * slRightVolume_)>>15;
|
|
||||||
|
|
||||||
slLeftSample ^= (SLONG)((mmSurroundFactor>> 0)&0xFFFFFFFF);
|
|
||||||
slRightSample ^= (SLONG)((mmSurroundFactor>>32)&0xFFFFFFFF);
|
|
||||||
|
|
||||||
// mix in current sample
|
|
||||||
slLeftSample += pslDstBuffer[0];
|
|
||||||
slRightSample += pslDstBuffer[1];
|
|
||||||
// upper clamp
|
|
||||||
if( slLeftSample > MAX_SWORD) slLeftSample = MAX_SWORD;
|
|
||||||
if( slRightSample > MAX_SWORD) slRightSample = MAX_SWORD;
|
|
||||||
// lower clamp
|
|
||||||
if( slLeftSample < MIN_SWORD) slLeftSample = MIN_SWORD;
|
|
||||||
if( slRightSample < MIN_SWORD) slRightSample = MIN_SWORD;
|
|
||||||
|
|
||||||
// store samples (both channels)
|
|
||||||
pslDstBuffer[0] = slLeftSample;
|
|
||||||
pslDstBuffer[1] = slRightSample;
|
|
||||||
|
|
||||||
// modify volume `
|
|
||||||
slLeftVolume += (SWORD)((mmVolumeGain>> 0)&0xFFFF);
|
|
||||||
slRightVolume += (SWORD)((mmVolumeGain>>16)&0xFFFF);
|
|
||||||
|
|
||||||
// advance to next sample
|
|
||||||
fixLeftOfs += fixLeftStep;
|
|
||||||
fixRightOfs += fixRightStep;
|
|
||||||
pslDstBuffer += 2;
|
|
||||||
iCt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
|
||||||
__asm {
|
__asm {
|
||||||
// convert from floats to fixints 32:16
|
// convert from floats to fixints 32:16
|
||||||
fld D [fLeftOfs]
|
fld D [fLeftOfs]
|
||||||
|
@ -548,24 +430,11 @@ loopEnd:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__) && (defined USE_I386_NASM_ASM)
|
||||||
// This is implemented in an external NASM file.
|
// This is implemented in an external NASM file.
|
||||||
MixMono_asm(pso);
|
MixMono_asm(pso);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error please write inline asm for your platform.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
_pfSoundProfile.StopTimer(CSoundProfile::PTI_RAWMIXER);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// mixes one stereo 16-bit signed sound to destination buffer
|
|
||||||
inline void MixStereo( CSoundObject *pso)
|
|
||||||
{
|
|
||||||
_pfSoundProfile.StartTimer(CSoundProfile::PTI_RAWMIXER);
|
|
||||||
|
|
||||||
#if (defined USE_PORTABLE_C)
|
|
||||||
// initialize some local vars
|
// initialize some local vars
|
||||||
SLONG slLeftSample, slRightSample, slNextSample;
|
SLONG slLeftSample, slRightSample, slNextSample;
|
||||||
SLONG *pslDstBuffer = (SLONG*)pvMixerBuffer;
|
SLONG *pslDstBuffer = (SLONG*)pvMixerBuffer;
|
||||||
|
@ -599,12 +468,12 @@ inline void MixStereo( CSoundObject *pso)
|
||||||
if( iCt<=0 || bEndOfSound) break;
|
if( iCt<=0 || bEndOfSound) break;
|
||||||
|
|
||||||
// fetch one lineary interpolated sample on left channel
|
// fetch one lineary interpolated sample on left channel
|
||||||
slLeftSample = pswSrcBuffer[(fixLeftOfs>>15)+0];
|
slLeftSample = pswSrcBuffer[(fixLeftOfs>>16)+0];
|
||||||
slNextSample = pswSrcBuffer[(fixLeftOfs>>15)+2];
|
slNextSample = pswSrcBuffer[(fixLeftOfs>>16)+1];
|
||||||
slLeftSample = (slLeftSample*(65535-(fixLeftOfs&65535)) + slNextSample*(fixLeftOfs&65535)) >>16;
|
slLeftSample = (slLeftSample*(65535-(fixLeftOfs&65535)) + slNextSample*(fixLeftOfs&65535)) >>16;
|
||||||
// fetch one lineary interpolated sample on right channel
|
// fetch one lineary interpolated sample on right channel
|
||||||
slRightSample = pswSrcBuffer[(fixRightOfs>>15)+0];
|
slRightSample = pswSrcBuffer[(fixRightOfs>>16)+0];
|
||||||
slNextSample = pswSrcBuffer[(fixRightOfs>>15)+2];
|
slNextSample = pswSrcBuffer[(fixRightOfs>>16)+1];
|
||||||
slRightSample = (slRightSample*(65535-(fixRightOfs&65535)) + slNextSample*(fixRightOfs&65535)) >>16;
|
slRightSample = (slRightSample*(65535-(fixRightOfs&65535)) + slNextSample*(fixRightOfs&65535)) >>16;
|
||||||
|
|
||||||
// filter samples
|
// filter samples
|
||||||
|
@ -643,7 +512,18 @@ inline void MixStereo( CSoundObject *pso)
|
||||||
iCt--;
|
iCt--;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __MSVC_INLINE__)
|
#endif
|
||||||
|
|
||||||
|
_pfSoundProfile.StopTimer(CSoundProfile::PTI_RAWMIXER);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// mixes one stereo 16-bit signed sound to destination buffer
|
||||||
|
inline void MixStereo( CSoundObject *pso)
|
||||||
|
{
|
||||||
|
_pfSoundProfile.StartTimer(CSoundProfile::PTI_RAWMIXER);
|
||||||
|
|
||||||
|
#if (defined __MSVC_INLINE__)
|
||||||
__asm {
|
__asm {
|
||||||
// convert from floats to fixints 32:16
|
// convert from floats to fixints 32:16
|
||||||
fld D [fLeftOfs]
|
fld D [fLeftOfs]
|
||||||
|
@ -778,12 +658,88 @@ loopEnd:
|
||||||
emms
|
emms
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined __GNU_INLINE__)
|
#elif (defined __GNU_INLINE_X86_32__) && (defined USE_I386_NASM_ASM)
|
||||||
// This is implemented in an external NASM file.
|
// This is implemented in an external NASM file.
|
||||||
MixStereo_asm(pso);
|
MixStereo_asm(pso);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error please write inline asm for your platform.
|
// initialize some local vars
|
||||||
|
SLONG slLeftSample, slRightSample, slNextSample;
|
||||||
|
SLONG *pslDstBuffer = (SLONG*)pvMixerBuffer;
|
||||||
|
fixLeftOfs = (__int64)(fLeftOfs * 65536.0);
|
||||||
|
fixRightOfs = (__int64)(fRightOfs * 65536.0);
|
||||||
|
__int64 fixLeftStep = (__int64)(fLeftStep * 65536.0);
|
||||||
|
__int64 fixRightStep = (__int64)(fRightStep * 65536.0);
|
||||||
|
__int64 fixSoundBufferSize = ((__int64)slSoundBufferSize)<<16;
|
||||||
|
mmSurroundFactor = (__int64)(SWORD)mmSurroundFactor;
|
||||||
|
|
||||||
|
SLONG slLeftVolume_ = slLeftVolume >> 16;
|
||||||
|
SLONG slRightVolume_ = slRightVolume >> 16;
|
||||||
|
|
||||||
|
// loop thru source buffer
|
||||||
|
INDEX iCt = slMixerBufferSize;
|
||||||
|
FOREVER
|
||||||
|
{
|
||||||
|
// if left channel source sample came to end of sample buffer
|
||||||
|
if( fixLeftOfs >= fixSoundBufferSize) {
|
||||||
|
fixLeftOfs -= fixSoundBufferSize;
|
||||||
|
// if has no loop, end it
|
||||||
|
bEndOfSound = bNotLoop;
|
||||||
|
}
|
||||||
|
// if right channel source sample came to end of sample buffer
|
||||||
|
if( fixRightOfs >= fixSoundBufferSize) {
|
||||||
|
fixRightOfs -= fixSoundBufferSize;
|
||||||
|
// if has no loop, end it
|
||||||
|
bEndOfSound = bNotLoop;
|
||||||
|
}
|
||||||
|
// end of buffer?
|
||||||
|
if( iCt<=0 || bEndOfSound) break;
|
||||||
|
|
||||||
|
// fetch one lineary interpolated sample on left channel
|
||||||
|
slLeftSample = pswSrcBuffer[(fixLeftOfs>>15)+0];
|
||||||
|
slNextSample = pswSrcBuffer[(fixLeftOfs>>15)+2];
|
||||||
|
slLeftSample = (slLeftSample*(65535-(fixLeftOfs&65535)) + slNextSample*(fixLeftOfs&65535)) >>16;
|
||||||
|
// fetch one lineary interpolated sample on right channel
|
||||||
|
slRightSample = pswSrcBuffer[(fixRightOfs>>15)+0];
|
||||||
|
slNextSample = pswSrcBuffer[(fixRightOfs>>15)+2];
|
||||||
|
slRightSample = (slRightSample*(65535-(fixRightOfs&65535)) + slNextSample*(fixRightOfs&65535)) >>16;
|
||||||
|
|
||||||
|
// filter samples
|
||||||
|
slLastLeftSample += ((slLeftSample -slLastLeftSample) *slLeftFilter) >>15;
|
||||||
|
slLastRightSample += ((slRightSample-slLastRightSample)*slRightFilter)>>15;
|
||||||
|
|
||||||
|
// apply stereo volume to current sample
|
||||||
|
slLeftSample = (slLastLeftSample * slLeftVolume_) >>15;
|
||||||
|
slRightSample = (slLastRightSample * slRightVolume_)>>15;
|
||||||
|
|
||||||
|
slLeftSample ^= (SLONG)((mmSurroundFactor>> 0)&0xFFFFFFFF);
|
||||||
|
slRightSample ^= (SLONG)((mmSurroundFactor>>32)&0xFFFFFFFF);
|
||||||
|
|
||||||
|
// mix in current sample
|
||||||
|
slLeftSample += pslDstBuffer[0];
|
||||||
|
slRightSample += pslDstBuffer[1];
|
||||||
|
// upper clamp
|
||||||
|
if( slLeftSample > MAX_SWORD) slLeftSample = MAX_SWORD;
|
||||||
|
if( slRightSample > MAX_SWORD) slRightSample = MAX_SWORD;
|
||||||
|
// lower clamp
|
||||||
|
if( slLeftSample < MIN_SWORD) slLeftSample = MIN_SWORD;
|
||||||
|
if( slRightSample < MIN_SWORD) slRightSample = MIN_SWORD;
|
||||||
|
|
||||||
|
// store samples (both channels)
|
||||||
|
pslDstBuffer[0] = slLeftSample;
|
||||||
|
pslDstBuffer[1] = slRightSample;
|
||||||
|
|
||||||
|
// modify volume `
|
||||||
|
slLeftVolume += (SWORD)((mmVolumeGain>> 0)&0xFFFF);
|
||||||
|
slRightVolume += (SWORD)((mmVolumeGain>>16)&0xFFFF);
|
||||||
|
|
||||||
|
// advance to next sample
|
||||||
|
fixLeftOfs += fixLeftStep;
|
||||||
|
fixRightOfs += fixRightStep;
|
||||||
|
pslDstBuffer += 2;
|
||||||
|
iCt--;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
_pfSoundProfile.StopTimer(CSoundProfile::PTI_RAWMIXER);
|
_pfSoundProfile.StopTimer(CSoundProfile::PTI_RAWMIXER);
|
||||||
|
|
|
@ -14,10 +14,10 @@ cd $_
|
||||||
#ninja
|
#ninja
|
||||||
|
|
||||||
# This is the eventual path for amd64.
|
# This is the eventual path for amd64.
|
||||||
#cmake -DCMAKE_BUILD_TYPE=Debug -DUSE_I386_ASM=FALSE ..
|
#cmake -DCMAKE_BUILD_TYPE=Debug ..
|
||||||
|
|
||||||
# Right now we force x86, though...
|
# Right now we force x86, though...
|
||||||
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS=-m32 -DCMAKE_CXX_FLAGS=-m32 -DUSE_I386_ASM=TRUE ..
|
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS=-m32 -DCMAKE_CXX_FLAGS=-m32 -DUSE_I386_NASM_ASM=TRUE ..
|
||||||
|
|
||||||
make -j$NCPU
|
make -j$NCPU
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ cd $_
|
||||||
#ninja
|
#ninja
|
||||||
|
|
||||||
# This is the eventual path for amd64.
|
# This is the eventual path for amd64.
|
||||||
cmake -DCMAKE_BUILD_TYPE=Debug -DUSE_I386_ASM=FALSE ..
|
cmake -DCMAKE_BUILD_TYPE=Debug ..
|
||||||
|
|
||||||
# Right now we force x86, though...
|
# Right now we force x86, though...
|
||||||
#cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS=-m32 -DCMAKE_CXX_FLAGS=-m32 ..
|
#cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS=-m32 -DCMAKE_CXX_FLAGS=-m32 ..
|
||||||
|
|
|
@ -9,6 +9,6 @@ set -x
|
||||||
rm -rf cmake-build
|
rm -rf cmake-build
|
||||||
mkdir $_
|
mkdir $_
|
||||||
cd $_
|
cd $_
|
||||||
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_OSX_ARCHITECTURES=i386 -DUSE_I386_ASM=TRUE -DUSE_SYSTEM_SDL2=FALSE ..
|
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_OSX_ARCHITECTURES=i386 -DUSE_I386_NASM_ASM=TRUE -DUSE_SYSTEM_SDL2=FALSE ..
|
||||||
make -j$NCPU
|
make -j$NCPU
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,6 @@ set -x
|
||||||
rm -rf cmake-build
|
rm -rf cmake-build
|
||||||
mkdir $_
|
mkdir $_
|
||||||
cd $_
|
cd $_
|
||||||
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_OSX_ARCHITECTURES=x86_64 -DUSE_I386_ASM=FALSE ..
|
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_OSX_ARCHITECTURES=x86_64 ..
|
||||||
make -j$NCPU
|
make -j$NCPU
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user