make BYTESWAP*_unsigned() macros inline functions, use GCC builtins

the builtins are only used when using GCC or clang, of course, otherwise
the usual shifting is done.

Them being inline functions instead of macros increases type safety
and gets rid of problems with signed shifts.

Changed two places in the code that swapped bytes in 32bit ints to use
BYTESWAP32_unsigned() instead - in case of PrepareTexture() this has
probably even fixed issues with signed shifts
This commit is contained in:
Daniel Gibson 2016-04-22 19:03:24 +02:00
parent a8c6c77309
commit 46353ffc5d
3 changed files with 35 additions and 6 deletions

View File

@ -679,15 +679,45 @@ inline void Clear(float i) {};
inline void Clear(double i) {};
inline void Clear(void *pv) {};
// These macros are not safe to use unless data is UNSIGNED!
#define BYTESWAP16_unsigned(x) ((((x)>>8)&0xff)+ (((x)<<8)&0xff00))
#define BYTESWAP32_unsigned(x) (((x)>>24) + (((x)>>8)&0xff00) + (((x)<<8)&0xff0000) + ((x)<<24))
// DG: screw macros, use inline functions instead - they're even safe for signed values
inline UWORD BYTESWAP16_unsigned(UWORD x)
{
#ifdef __GNUC__ // GCC and clang have a builtin that hopefully does the most efficient thing
return __builtin_bswap16(x);
#else
return (((x)>>8)&0xff)+ (((x)<<8)&0xff00);
#endif
}
inline ULONG BYTESWAP32_unsigned(ULONG x)
{
#ifdef __GNUC__ // GCC and clang have a builtin that hopefully does the most efficient thing
return __builtin_bswap32(x);
#else
return ((x)>>24) + (((x)>>8)&0xff00) + (((x)<<8)&0xff0000) + ((x)<<24);
#endif
}
inline __uint64 BYTESWAP64_unsigned(__uint64 x)
{
#ifdef __GNUC__ // GCC and clang have a builtin that hopefully does the most efficient thing
return __builtin_bswap64(x);
#else
ULONG l = BYTESWAP32_unsigned((ULONG)(val & 0xFFFFFFFF));
ULONG h = BYTESWAP32_unsigned((ULONG)((val >> 32) & 0xFFFFFFFF));
return (((__uint64)l) << 32) | ((__uint64)h);
#endif
}
// rcg03242004
#if PLATFORM_LITTLEENDIAN
#define BYTESWAP(x)
#else
// TODO: DG: the following stuff could probably be updated to use the functions above properly,
// which should make lots of cases easier. As I don't have a big endian machine I can't test,
// so I won't touch this for now.
static inline void BYTESWAP(UWORD &val)
{
#if __POWERPC__

View File

@ -109,8 +109,7 @@ pixLoop:
DWORD* dst = (DWORD*)(pubTexture+pixTextureSize);
for (int i=0; i<pixTextureSize; i++) {
const DWORD tmp = ((DWORD)*src) | 0xFFFFFF00;
*dst = ((tmp << 24) & 0xff000000 ) | ((tmp << 8) & 0x00ff0000 ) |
((tmp >> 8) & 0x0000ff00 ) | ((tmp >> 24) & 0x000000ff );
*dst = BYTESWAP32_unsigned((ULONG)tmp);
src++;
dst++;
}

View File

@ -1959,7 +1959,7 @@ __forceinline void CLayerMixer::FillShadowLayer( COLOR col)
#else
DWORD* dst = (DWORD*)lm_pulShadowMap;
int n = lm_pixCanvasSizeU*lm_pixCanvasSizeV;
DWORD color = __builtin_bswap32(col);
DWORD color = BYTESWAP32_unsigned(col);
while(n--) {*(dst++)=color;}
#endif
}