From 8d26863a51dd8dc0c30831b17dbd4cef1119bb9d Mon Sep 17 00:00:00 2001
From: Daniel Gibson <metalcaedes@gmail.com>
Date: Sun, 17 Apr 2016 23:15:56 +0200
Subject: [PATCH] Improve some portable C implementations of math functions

* FloatToInt() should now  round correctly ot nearest, even for
  negative numbers
* Log2() now calls log2f() instead of log10()*3.321 - no idea what the
  previous code was about, I doubt it's faster (and the ASM code uses
  something like log2, too).
* FastLog2() (for integers) now uses __builtin_clz() when building with
  GCC/clang - the resulting ASM should be pretty similar to the inline
  ASM below. I wonder why that function takes signed ints, log2(-1) in
  reality is an irrational number (but the function returns 31)..
  Also, both the inline ASM and my version return 0 for Log2(0), but
  INT_MIN would be closer to the truth
* commented out FastMaxLog2(), it's unused.
* implemented _rotl() using a fast(er) trick from
  http://blog.regehr.org/archives/1063
---
 Sources/Engine/Base/Types.h     |  7 ++-----
 Sources/Engine/Math/Functions.h | 16 +++++++++++++---
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/Sources/Engine/Base/Types.h b/Sources/Engine/Base/Types.h
index 83afcb8..056ebf3 100644
--- a/Sources/Engine/Base/Types.h
+++ b/Sources/Engine/Base/Types.h
@@ -182,11 +182,8 @@ typedef unsigned int        UINT;
     inline ULONG _rotl(ULONG ul, int bits)
     {
         #if (defined USE_PORTABLE_C)
-            // This is not fast at all, but it works.
-            for (int i = 0; i < bits; i++)
-                ul = ( (ul << 1) | ((ul & 0x80000000) >> 31) );
-            return(ul);
-
+            // DG: according to http://blog.regehr.org/archives/1063 this is fast
+            return (ul<<bits) | (ul>>(-bits&31));
         #elif (defined __GNU_INLINE__)
             // This, on the other hand, is wicked fast.  :)
             __asm__ __volatile__ (
diff --git a/Sources/Engine/Math/Functions.h b/Sources/Engine/Math/Functions.h
index 57ac5b5..d18b962 100755
--- a/Sources/Engine/Math/Functions.h
+++ b/Sources/Engine/Math/Functions.h
@@ -313,7 +313,10 @@ inline FLOAT NormByteToFloat( const ULONG ul)
 inline SLONG FloatToInt( FLOAT f)
 {
 #if defined(__arm__) || defined(USE_PORTABLE_C)
-  return((SLONG) (f + 0.5f));  /* best of luck to you. */
+  // round to nearest by adding/subtracting 0.5 (depending on f pos/neg) before converting to SLONG
+  float addToRound = 0.5f;
+  copysignf(addToRound, f); // copy f's signbit to addToRound => if f<0 then addToRound = -addToRound
+  return((SLONG) (f + addToRound));
 
 #elif (defined __MSVC_INLINE__)
   SLONG slRet;
@@ -341,8 +344,7 @@ inline SLONG FloatToInt( FLOAT f)
 // log base 2 of any float numero
 inline FLOAT Log2( FLOAT f) {
 #if (defined USE_PORTABLE_C) || defined(__arm__)
-  // !!! FIXME: What's wrong with log2()?
-  return (FLOAT)(log10(f)*3.321928094887);  // log10(x)/log10(2)
+  return log2f(f);
 
 #elif (defined __MSVC_INLINE__)
   FLOAT fRet;
@@ -376,6 +378,11 @@ inline FLOAT Log2( FLOAT f) {
 inline SLONG FastLog2( SLONG x)
 {
 #if (defined USE_PORTABLE_C)
+#ifdef __GNUC__
+  if(x == 0) return 0; // __builtin_clz() is undefined for 0
+  int numLeadingZeros  = __builtin_clz(x);
+  return 31 - numLeadingZeros;
+#else
   register SLONG val = x;
   register SLONG retval = 31;
   while (retval > 0)
@@ -386,6 +393,7 @@ inline SLONG FastLog2( SLONG x)
   }
 
   return 0;
+#endif
 
 #elif (defined __MSVC_INLINE__)
   SLONG slRet;
@@ -409,6 +417,7 @@ inline SLONG FastLog2( SLONG x)
 #endif
 }
 
+/* DG: function is unused => doesn't matter that portable implementation is not optimal :)
 // returns log2 of first larger value that is a power of 2
 inline SLONG FastMaxLog2( SLONG x)
 { 
@@ -443,6 +452,7 @@ printf("CHECK THIS: %s:%d\n", __FILE__, __LINE__);
   #error Fill this in for your platform.
 #endif
 }
+*/