Improve some portable C implementations of math functions

* FloatToInt() should now round correctly ot nearest, even for negative numbers * Log2() now calls log2f() instead of log10()*3.321 - no idea what the previous code was about, I doubt it's faster (and the ASM code uses something like log2, too). * FastLog2() (for integers) now uses __builtin_clz() when building with GCC/clang - the resulting ASM should be pretty similar to the inline ASM below. I wonder why that function takes signed ints, log2(-1) in reality is an irrational number (but the function returns 31).. Also, both the inline ASM and my version return 0 for Log2(0), but INT_MIN would be closer to the truth * commented out FastMaxLog2(), it's unused. * implemented _rotl() using a fast(er) trick from http://blog.regehr.org/archives/1063
2025-08-20 14:50:08 +02:00 · 2016-04-17 23:15:56 +02:00 · 2016-04-17 23:15:56 +02:00 · 8d26863a51
commit 8d26863a51
parent b934fa1945
2 changed files with 15 additions and 8 deletions
--- a/Sources/Engine/Base/Types.h
+++ b/Sources/Engine/Base/Types.h
@ -182,11 +182,8 @@ typedef unsigned int        UINT;
    inline ULONG _rotl(ULONG ul, int bits)
    {
        #if (defined USE_PORTABLE_C)
-            // This is not fast at all, but it works.
-            for (int i = 0; i < bits; i++)
-                ul = ( (ul << 1) | ((ul & 0x80000000) >> 31) );
-            return(ul);
-
+            // DG: according to http://blog.regehr.org/archives/1063 this is fast
+            return (ul<<bits) | (ul>>(-bits&31));
        #elif (defined __GNU_INLINE__)
            // This, on the other hand, is wicked fast.  :)
            __asm__ __volatile__ (
--- a/Sources/Engine/Math/Functions.h
+++ b/Sources/Engine/Math/Functions.h
@ -313,7 +313,10 @@ inline FLOAT NormByteToFloat( const ULONG ul)
 inline SLONG FloatToInt( FLOAT f)
 {
 #if defined(__arm__) || defined(USE_PORTABLE_C)
-  return((SLONG) (f + 0.5f));  /* best of luck to you. */
+  // round to nearest by adding/subtracting 0.5 (depending on f pos/neg) before converting to SLONG
+  float addToRound = 0.5f;
+  copysignf(addToRound, f); // copy f's signbit to addToRound => if f<0 then addToRound = -addToRound
+  return((SLONG) (f + addToRound));

 #elif (defined __MSVC_INLINE__)
  SLONG slRet;
@ -341,8 +344,7 @@ inline SLONG FloatToInt( FLOAT f)
 // log base 2 of any float numero
 inline FLOAT Log2( FLOAT f) {
 #if (defined USE_PORTABLE_C) || defined(__arm__)
-  // !!! FIXME: What's wrong with log2()?
-  return (FLOAT)(log10(f)*3.321928094887);  // log10(x)/log10(2)
+  return log2f(f);

 #elif (defined __MSVC_INLINE__)
  FLOAT fRet;
@ -376,6 +378,11 @@ inline FLOAT Log2( FLOAT f) {
 inline SLONG FastLog2( SLONG x)
 {
 #if (defined USE_PORTABLE_C)
+#ifdef __GNUC__
+  if(x == 0) return 0; // __builtin_clz() is undefined for 0
+  int numLeadingZeros  = __builtin_clz(x);
+  return 31 - numLeadingZeros;
+#else
  register SLONG val = x;
  register SLONG retval = 31;
  while (retval > 0)
@ -386,6 +393,7 @@ inline SLONG FastLog2( SLONG x)
  }

  return 0;
+#endif

 #elif (defined __MSVC_INLINE__)
  SLONG slRet;
@ -409,6 +417,7 @@ inline SLONG FastLog2( SLONG x)
 #endif
 }

+/* DG: function is unused => doesn't matter that portable implementation is not optimal :)
 // returns log2 of first larger value that is a power of 2
 inline SLONG FastMaxLog2( SLONG x)
 { 
@ -443,6 +452,7 @@ printf("CHECK THIS: %s:%d\n", __FILE__, __LINE__);
  #error Fill this in for your platform.
 #endif
 }
+*/