From c58328d8817949162abf5887acb98a4f5c60a462 Mon Sep 17 00:00:00 2001 From: Daniel Gibson Date: Fri, 22 Apr 2016 19:40:09 +0200 Subject: [PATCH] RSBinToGroups() use __builtin_clz() instead of manual bsr if available should be faster. --- .../Engine/Graphics/DrawPort_RenderScene.cpp | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/Sources/Engine/Graphics/DrawPort_RenderScene.cpp b/Sources/Engine/Graphics/DrawPort_RenderScene.cpp index d624442..579c53e 100644 --- a/Sources/Engine/Graphics/DrawPort_RenderScene.cpp +++ b/Sources/Engine/Graphics/DrawPort_RenderScene.cpp @@ -499,18 +499,27 @@ static void RSBinToGroups( ScenePolygon *pspoFirst) ); #else - // emulate x86's bsr opcode...not fast. :/ - register DWORD val = _ctGroupsCount; - register INDEX bsr = 31; - if (val != 0) - { - while (bsr > 0) - { - if (val & (1l << bsr)) - break; - bsr--; - } - } + // emulate x86's bsr opcode... + + // GCC and clang have an architecture-independent intrinsic for this + // (it counts leading zeros starting at MSB and is undefined for 0) + #ifdef __GNUC__ + INDEX bsr = 31; + if(_ctGroupsCount != 0) bsr -= __builtin_clz(_ctGroupsCount); + else bsr = 0; + #else // another compiler - doing it manually.. not fast. :/ + register DWORD val = _ctGroupsCount; + register INDEX bsr = 31; + if (val != 0) + { + while (bsr > 0) + { + if (val & (1l << bsr)) + break; + bsr--; + } + } + #endif _ctGroupsCount = 2 << bsr; #endif