From 1f70d4e242d96cac742e06a048774b70f9b06657 Mon Sep 17 00:00:00 2001
From: notaz <notasas@gmail.com>
Date: Sun, 24 Apr 2016 20:16:04 +0300
Subject: [PATCH] rework asm to always fall back to portable C code

with this there is no need to worry about x86 asm switch for other
platforms.
---
 Sources/Engine/Base/Base.h                    |   4 +-
 Sources/Engine/Base/Profiling.cpp             |  26 +-
 Sources/Engine/Base/Timer.cpp                 |   2 +-
 Sources/Engine/Base/Types.h                   |   8 +-
 Sources/Engine/Engine.cpp                     |  19 +-
 Sources/Engine/Graphics/Color.cpp             | 107 ++-
 Sources/Engine/Graphics/Color.h               |  83 +-
 .../Engine/Graphics/DrawPort_RenderScene.cpp  |  43 +-
 Sources/Engine/Graphics/Fog.cpp               |  29 +-
 .../Engine/Graphics/Gfx_OpenGL_Textures.cpp   |  52 +-
 Sources/Engine/Graphics/Graphics.cpp          | 799 +++++++++---------
 Sources/Engine/Graphics/OpenGL.h              |  14 +-
 Sources/Engine/Graphics/TextureEffects.cpp    |  13 +-
 Sources/Engine/Light/LayerMixer.cpp           | 175 ++--
 Sources/Engine/Math/Float.cpp                 |  31 +-
 Sources/Engine/Math/Functions.h               |  65 +-
 Sources/Engine/Models/RenderModel_View.cpp    |  32 +-
 Sources/Engine/Rendering/RendMisc.cpp         |   8 +-
 Sources/Engine/Sound/SoundMixer.cpp           | 298 +++----
 19 files changed, 790 insertions(+), 1018 deletions(-)
diff --git a/Sources/Engine/Base/Base.h b/Sources/Engine/Base/Base.h
index fd032f3..12151f1 100644
--- a/Sources/Engine/Base/Base.h
+++ b/Sources/Engine/Base/Base.h
@@ -65,9 +65,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #else
   #warning "UNKNOWN PLATFORM IDENTIFIED!!!!"
   #define PLATFORM_UNKNOWN 1
-  #warning "USING PORTABLE C!!!"
-  #define USE_PORTABLE_C
-#endif 
+#endif
 
 #if PLATFORM_LINUX || PLATFORM_MACOSX
   #ifndef PLATFORM_UNIX
diff --git a/Sources/Engine/Base/Profiling.cpp b/Sources/Engine/Base/Profiling.cpp
index 744c740..87bd3b7 100644
--- a/Sources/Engine/Base/Profiling.cpp
+++ b/Sources/Engine/Base/Profiling.cpp
@@ -21,24 +21,13 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 template class CStaticArray<CProfileCounter>;
 template class CStaticArray<CProfileTimer>;
 
-#if (defined USE_PORTABLE_C)
+#if (defined PLATFORM_UNIX) && !defined(__GNU_INLINE_X86_32__)
 #include <sys/time.h>
 #endif
 
 static inline __int64 ReadTSC_profile(void)
 {
-#if (defined USE_PORTABLE_C)
-  #ifdef __arm__
-  struct timespec tv;
-  clock_gettime(CLOCK_MONOTONIC, &tv);
-  return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_nsec) / 1000000) );
-  #else
-  struct timeval tv;
-  gettimeofday(&tv, NULL);
-  return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_usec) / 1000) );
-  #endif
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __int64 mmRet;
   __asm {
     rdtsc
@@ -60,7 +49,16 @@ static inline __int64 ReadTSC_profile(void)
   return(mmRet);
 
 #else
-  #error Please implement for your platform/compiler.
+  #ifdef __arm__
+  struct timespec tv;
+  clock_gettime(CLOCK_MONOTONIC, &tv);
+  return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_nsec) / 1000000) );
+  #else
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_usec) / 1000) );
+  #endif
+
 #endif
 }
 
diff --git a/Sources/Engine/Base/Timer.cpp b/Sources/Engine/Base/Timer.cpp
index 5c77caa..8c02846 100755
--- a/Sources/Engine/Base/Timer.cpp
+++ b/Sources/Engine/Base/Timer.cpp
@@ -29,7 +29,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #include <Engine/Base/Priority.inl>
 
 // !!! FIXME: use SDL timer code instead and rdtsc never?
-#if (USE_PORTABLE_C) 
+#if (defined PLATFORM_UNIX) && !defined(__GNU_INLINE_X86_32__)
 #define USE_GETTIMEOFDAY 1
 #endif
 
diff --git a/Sources/Engine/Base/Types.h b/Sources/Engine/Base/Types.h
index bfa030f..fe1672a 100644
--- a/Sources/Engine/Base/Types.h
+++ b/Sources/Engine/Base/Types.h
@@ -229,10 +229,7 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*));
 
     inline ULONG _rotl(ULONG ul, int bits)
     {
-        #if (defined USE_PORTABLE_C)
-            // DG: according to http://blog.regehr.org/archives/1063 this is fast
-            return (ul<<bits) | (ul>>(-bits&31));
-        #elif (defined __GNU_INLINE_X86_32__)
+        #if (defined __GNU_INLINE_X86_32__)
             // This, on the other hand, is wicked fast.  :)
             __asm__ __volatile__ (
                 "roll %%cl, %%eax    \n\t"
@@ -254,7 +251,8 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*));
             return(ul);
 
         #else
-            #error need inline asm for your platform.
+            // DG: according to http://blog.regehr.org/archives/1063 this is fast
+            return (ul<<bits) | (ul>>(-bits&31));
         #endif
     }
 
diff --git a/Sources/Engine/Engine.cpp b/Sources/Engine/Engine.cpp
index 1ce48b1..4ca653e 100644
--- a/Sources/Engine/Engine.cpp
+++ b/Sources/Engine/Engine.cpp
@@ -125,14 +125,10 @@ BOOL APIENTRY DllMain( HANDLE hModule, DWORD  ul_reason_for_call, LPVOID lpReser
 
 static void DetectCPU(void)
 {
-#if (defined USE_PORTABLE_C)  // rcg10072001
-  CPrintF(TRANSV("  (No CPU detection in this binary.)\n"));
-
-#else
-  char strVendor[12+1];
+  char strVendor[12+1] = { 0 };
   strVendor[12] = 0;
-  ULONG ulTFMS;
-  ULONG ulFeatures;
+  ULONG ulTFMS = 0;
+  ULONG ulFeatures = 0;
 
   #if (defined __MSVC_INLINE__)
   // test MMX presence and update flag
@@ -181,10 +177,13 @@ static void DetectCPU(void)
             : "eax", "ecx", "edx", "memory"
     );
 
-  #else
-    #error Please implement for your platform or define USE_PORTABLE_C.
   #endif
 
+  if (ulTFMS == 0) {
+    CPrintF(TRANSV("  (No CPU detection in this binary.)\n"));
+    return;
+  }
+
   INDEX iType     = (ulTFMS>>12)&0x3;
   INDEX iFamily   = (ulTFMS>> 8)&0xF;
   INDEX iModel    = (ulTFMS>> 4)&0xF;
@@ -215,8 +214,6 @@ static void DetectCPU(void)
   sys_iCPUMHz = INDEX(_pTimer->tm_llCPUSpeedHZ/1E6);
 
   if( !bMMX) FatalError( TRANS("MMX support required but not present!"));
-
-#endif  // defined USE_PORTABLE_C
 }
 
 static void DetectCPUWrapper(void)
diff --git a/Sources/Engine/Graphics/Color.cpp b/Sources/Engine/Graphics/Color.cpp
index 1d88614..6a6519c 100644
--- a/Sources/Engine/Graphics/Color.cpp
+++ b/Sources/Engine/Graphics/Color.cpp
@@ -247,30 +247,7 @@ COLOR MulColors( COLOR col1, COLOR col2)
   if( col2==0xFFFFFFFF)   return col1;
   if( col1==0 || col2==0) return 0;
 
-#if (defined USE_PORTABLE_C)
-  // !!! FIXME: This...is not fast.
-  union
-  {
-    COLOR col;
-    UBYTE bytes[4];
-  } conv1;
-
-  union
-  {
-    COLOR col;
-    UBYTE bytes[4];
-  } conv2;
-
-  conv1.col = col1;
-  conv2.col = col2;
-  conv1.bytes[0] = (UBYTE) ((((DWORD) conv1.bytes[0]) * ((DWORD) conv2.bytes[0])) / 255);
-  conv1.bytes[1] = (UBYTE) ((((DWORD) conv1.bytes[1]) * ((DWORD) conv2.bytes[1])) / 255);
-  conv1.bytes[2] = (UBYTE) ((((DWORD) conv1.bytes[2]) * ((DWORD) conv2.bytes[2])) / 255);
-  conv1.bytes[3] = (UBYTE) ((((DWORD) conv1.bytes[3]) * ((DWORD) conv2.bytes[3])) / 255);
-
-  return(conv1.col);
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   COLOR colRet;
   __asm {
     xor     ebx,ebx
@@ -433,20 +410,6 @@ COLOR MulColors( COLOR col1, COLOR col2)
 
   return colRet;
 #else
-  #error please fill in inline assembly for your platform.
-#endif
-}
-
-
-// fast color additon function - RES = clamp (1ST + 2ND)
-COLOR AddColors( COLOR col1, COLOR col2) 
-{
-  if( col1==0) return col2;
-  if( col2==0) return col1;
-  if( col1==0xFFFFFFFF || col2==0xFFFFFFFF) return 0xFFFFFFFF;
-  COLOR colRet;
-
-#if (defined USE_PORTABLE_C)
   // !!! FIXME: This...is not fast.
   union
   {
@@ -459,19 +422,28 @@ COLOR AddColors( COLOR col1, COLOR col2)
     COLOR col;
     UBYTE bytes[4];
   } conv2;
-  #define MINVAL(a, b) ((a)>(b))?(b):(a)
 
   conv1.col = col1;
   conv2.col = col2;
-  conv1.bytes[0] = (UBYTE) MINVAL((((WORD) conv1.bytes[0]) + ((WORD) conv2.bytes[0])) , 255);
-  conv1.bytes[1] = (UBYTE) MINVAL((((WORD) conv1.bytes[1]) + ((WORD) conv2.bytes[1])) , 255);
-  conv1.bytes[2] = (UBYTE) MINVAL((((WORD) conv1.bytes[2]) + ((WORD) conv2.bytes[2])) , 255);
-  conv1.bytes[3] = (UBYTE) MINVAL((((WORD) conv1.bytes[3]) + ((WORD) conv2.bytes[3])) , 255);
-  #undef MINVAL
+  conv1.bytes[0] = (UBYTE) ((((DWORD) conv1.bytes[0]) * ((DWORD) conv2.bytes[0])) / 255);
+  conv1.bytes[1] = (UBYTE) ((((DWORD) conv1.bytes[1]) * ((DWORD) conv2.bytes[1])) / 255);
+  conv1.bytes[2] = (UBYTE) ((((DWORD) conv1.bytes[2]) * ((DWORD) conv2.bytes[2])) / 255);
+  conv1.bytes[3] = (UBYTE) ((((DWORD) conv1.bytes[3]) * ((DWORD) conv2.bytes[3])) / 255);
 
-  colRet = conv1.col;
+  return(conv1.col);
+#endif
+}
 
-#elif (defined __MSVC_INLINE__)
+
+// fast color additon function - RES = clamp (1ST + 2ND)
+COLOR AddColors( COLOR col1, COLOR col2) 
+{
+  if( col1==0) return col2;
+  if( col2==0) return col1;
+  if( col1==0xFFFFFFFF || col2==0xFFFFFFFF) return 0xFFFFFFFF;
+  COLOR colRet;
+
+#if (defined __MSVC_INLINE__)
   __asm {
     xor     ebx,ebx
     mov     esi,255
@@ -608,7 +580,29 @@ COLOR AddColors( COLOR col1, COLOR col2)
   );
 
 #else
-  #error please fill in inline assembly for your platform.
+  // !!! FIXME: This...is not fast.
+  union
+  {
+    COLOR col;
+    UBYTE bytes[4];
+  } conv1;
+
+  union
+  {
+    COLOR col;
+    UBYTE bytes[4];
+  } conv2;
+  #define MINVAL(a, b) ((a)>(b))?(b):(a)
+
+  conv1.col = col1;
+  conv2.col = col2;
+  conv1.bytes[0] = (UBYTE) MINVAL((((WORD) conv1.bytes[0]) + ((WORD) conv2.bytes[0])) , 255);
+  conv1.bytes[1] = (UBYTE) MINVAL((((WORD) conv1.bytes[1]) + ((WORD) conv2.bytes[1])) , 255);
+  conv1.bytes[2] = (UBYTE) MINVAL((((WORD) conv1.bytes[2]) + ((WORD) conv2.bytes[2])) , 255);
+  conv1.bytes[3] = (UBYTE) MINVAL((((WORD) conv1.bytes[3]) + ((WORD) conv2.bytes[3])) , 255);
+  #undef MINVAL
+
+  colRet = conv1.col;
 #endif
 
   return colRet;
@@ -619,14 +613,7 @@ COLOR AddColors( COLOR col1, COLOR col2)
 // multiple conversion from OpenGL color to DirectX color
 extern void abgr2argb( ULONG *pulSrc, ULONG *pulDst, INDEX ct)
 {
-#if (defined USE_PORTABLE_C)
-  //#error write me.
-  for (int i=0; i<ct; i++) {
-    ULONG tmp = pulSrc[i];
-    pulDst[i] = (tmp&0xff00ff00) | ((tmp&0x00ff0000)>>16) | ((tmp&0x000000ff)<<16);
-  }
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     mov   esi,dword ptr [pulSrc]
     mov   edi,dword ptr [pulDst]
@@ -678,12 +665,12 @@ colSkip2:
     mov   dword ptr [edi],eax
 colSkip1:
   }
-
-#elif (defined __GNU_INLINE_X86_32__)
-  STUBBED("convert to inline asm.");
-
 #else
-  #error please fill in inline assembly for your platform.
+  for (int i=0; i<ct; i++) {
+    ULONG tmp = pulSrc[i];
+    pulDst[i] = (tmp&0xff00ff00) | ((tmp&0x00ff0000)>>16) | ((tmp&0x000000ff)<<16);
+  }
+
 #endif
 }
 
diff --git a/Sources/Engine/Graphics/Color.h b/Sources/Engine/Graphics/Color.h
index a7f048a..de0c666 100644
--- a/Sources/Engine/Graphics/Color.h
+++ b/Sources/Engine/Graphics/Color.h
@@ -204,19 +204,7 @@ ENGINE_API extern COLOR AddColors( COLOR col1, COLOR col2); // fast color addito
 __forceinline ULONG ByteSwap( ULONG ul)
 {
 /* rcg10052001 Platform-wrappers. */
-#if (defined USE_PORTABLE_C)
-	ul = ( ((ul << 24)            ) |
-           ((ul << 8) & 0x00FF0000) |
-           ((ul >> 8) & 0x0000FF00) |
-           ((ul >> 24)            ) );
-
-    #if (defined PLATFORM_BIGENDIAN)
-    BYTESWAP(ul);  // !!! FIXME: May not be right!
-    #endif
-
-    return(ul);
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   ULONG ulRet;
   __asm {
     mov   eax,dword ptr [ul]
@@ -234,16 +222,22 @@ __forceinline ULONG ByteSwap( ULONG ul)
   return(ul);
 
 #else
-  #error please define for your platform.
+  ul = ( ((ul << 24)            ) |
+         ((ul << 8) & 0x00FF0000) |
+         ((ul >> 8) & 0x0000FF00) |
+         ((ul >> 24)            ) );
+
+  #if (defined PLATFORM_BIGENDIAN)
+  BYTESWAP(ul);  // !!! FIXME: May not be right!
+  #endif
+
+  return(ul);
 #endif
 }
 
 __forceinline ULONG rgba2argb( ULONG ul)
 {
-#if (defined USE_PORTABLE_C)
-	return( (ul << 24) | (ul >> 8) );
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   ULONG ulRet;
   __asm {
     mov   eax,dword ptr [ul]
@@ -263,21 +257,14 @@ __forceinline ULONG rgba2argb( ULONG ul)
   return ulRet;
 
 #else
-  #error please define for your platform.
+  return (ul << 24) | (ul >> 8);
+
 #endif
 }
 
 __forceinline ULONG abgr2argb( COLOR col)
 {
-#if (defined USE_PORTABLE_C)
-	// this could be simplified, this is just a safe conversion from asm code
-	col = ( ((col << 24)            ) |
-            ((col << 8) & 0x00FF0000) |
-            ((col >> 8) & 0x0000FF00) |
-            ((col >> 24)            ) );
-	return( (col << 24) | (col >> 8) );
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   ULONG ulRet;
   __asm {
     mov   eax,dword ptr [col]
@@ -299,7 +286,13 @@ __forceinline ULONG abgr2argb( COLOR col)
   return ulRet;
 
 #else
-  #error please define for your platform.
+  // this could be simplified, this is just a safe conversion from asm code
+  col = ( ((col << 24)            ) |
+          ((col << 8) & 0x00FF0000) |
+          ((col >> 8) & 0x0000FF00) |
+          ((col >> 24)            ) );
+  return( (col << 24) | (col >> 8) );
+
 #endif
 }
 
@@ -311,10 +304,7 @@ extern void abgr2argb( ULONG *pulSrc, ULONG *pulDst, INDEX ct);
 // fast memory copy of ULONGs
 inline void CopyLongs( ULONG *pulSrc, ULONG *pulDst, INDEX ctLongs)
 {
-#if ((defined USE_PORTABLE_C) || (PLATFORM_MACOSX))
-  memcpy( pulDst, pulSrc, ctLongs*4);
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     cld
     mov   esi,dword ptr [pulSrc]
@@ -322,23 +312,8 @@ inline void CopyLongs( ULONG *pulSrc, ULONG *pulDst, INDEX ctLongs)
     mov   ecx,dword ptr [ctLongs]
     rep   movsd
   }
-
-#elif (defined __GNU_INLINE_X86_32__)
-    // I haven't benchmarked it, but in many cases, memcpy() becomes an
-    //  inline (asm?) macro on GNU platforms, so this might not be a
-    //  speed gain at all over the USE_PORTABLE_C version.
-    // You Have Been Warned. --ryan.
-  __asm__ __volatile__ (
-    "cld    \n\t"
-    "rep    \n\t"
-    "movsd  \n\t"
-        : "=S" (pulSrc), "=D" (pulDst), "=c" (ctLongs)
-        : "S" (pulSrc), "D" (pulDst), "c" (ctLongs)
-        : "cc", "memory"
-  );
-
 #else
-# error Please fill this in for your platform.
+  memcpy( pulDst, pulSrc, ctLongs*4);
 #endif
 }
 
@@ -346,11 +321,7 @@ inline void CopyLongs( ULONG *pulSrc, ULONG *pulDst, INDEX ctLongs)
 // fast memory set of ULONGs
 inline void StoreLongs( ULONG ulVal, ULONG *pulDst, INDEX ctLongs)
 {
-#if (defined USE_PORTABLE_C)
-  for( INDEX i=0; i<ctLongs; i++)
-    pulDst[i] = ulVal;
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     cld
     mov   eax,dword ptr [ulVal]
@@ -370,7 +341,9 @@ inline void StoreLongs( ULONG ulVal, ULONG *pulDst, INDEX ctLongs)
   );
 
 #else
-# error Please fill this in for your platform.
+  for( INDEX i=0; i<ctLongs; i++)
+    pulDst[i] = ulVal;
+
 #endif
 }
 
diff --git a/Sources/Engine/Graphics/DrawPort_RenderScene.cpp b/Sources/Engine/Graphics/DrawPort_RenderScene.cpp
index b471251..d624442 100644
--- a/Sources/Engine/Graphics/DrawPort_RenderScene.cpp
+++ b/Sources/Engine/Graphics/DrawPort_RenderScene.cpp
@@ -38,16 +38,6 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #define W  word ptr
 #define B  byte ptr
 
-#if (defined USE_PORTABLE_C)
-#define ASMOPT 0
-#elif (defined __MSVC_INLINE__)
-#define ASMOPT 1
-#elif (defined __GNU_INLINE_X86_32__)
-#define ASMOPT 1
-#else
-#define ASMOPT 0
-#endif
-
 #define MAXTEXUNITS   4
 #define SHADOWTEXTURE 3
 
@@ -153,8 +143,7 @@ void AddElements( ScenePolygon *pspo)
   const INDEX ctElems = pspo->spo_ctElements;
   INDEX *piDst = _aiElements.Push(ctElems);
 
-#if (ASMOPT == 1)
- #if (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     mov     eax,D [pspo]
     mov     ecx,D [ctElems]
@@ -184,7 +173,7 @@ elemRest:
     mov     D [edi],eax
 elemDone:
   }
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
   __asm__ __volatile__ (
     "movl    %[ctElems], %%ecx      \n\t"
     "movl    %[piDst], %%edi        \n\t"
@@ -219,11 +208,6 @@ elemDone:
           "cc", "memory"
   );
 
- #else
-   #error Please write inline ASM for your platform.
-
- #endif
-
 #else
   const INDEX iVtx0Pass = pspo->spo_iVtx0Pass;
   const INDEX *piSrc = pspo->spo_piElements;
@@ -495,9 +479,7 @@ static void RSBinToGroups( ScenePolygon *pspoFirst)
   // determine maximum used groups
   ASSERT( _ctGroupsCount);
 
-#if ASMOPT == 1
-
- #if (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     mov     eax,2
     bsr     ecx,D [_ctGroupsCount]
@@ -505,7 +487,7 @@ static void RSBinToGroups( ScenePolygon *pspoFirst)
     mov     D [_ctGroupsCount],eax
   }
 
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
   __asm__ __volatile__ (
     "movl     $2, %%eax          \n\t"
     "bsrl     (%%esi), %%ecx     \n\t"
@@ -516,11 +498,6 @@ static void RSBinToGroups( ScenePolygon *pspoFirst)
         : "eax", "ecx", "cc", "memory"
   );
 
- #else
-   #error Please write inline ASM for your platform.
-
- #endif
-
 #else
   // emulate x86's bsr opcode...not fast.  :/
   register DWORD val = _ctGroupsCount;
@@ -858,10 +835,7 @@ static void RSSetTextureCoords( ScenePolygon *pspoGroup, INDEX iLayer, INDEX iUn
       continue;
     }
 
-// !!! FIXME: rcg11232001 This inline conversion is broken. Use the
-// !!! FIXME: rcg11232001  C version for now with GCC.
-#if ((ASMOPT == 1) && (!defined __GNU_INLINE_X86_32__) && (!defined __INTEL_COMPILER))
-  #if (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
     __asm {
       mov     esi,D [pspo]
       mov     edi,D [iMappingOffset]
@@ -915,7 +889,7 @@ vtxLoop:
 /*
     // !!! FIXME: rcg11232001 This inline conversion is broken. Use the
     // !!! FIXME: rcg11232001  C version for now on Linux.
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
     STUBBED("debug this");
     __asm__ __volatile__ (
       "0:                                  \n\t" // vtxLoop
@@ -956,11 +930,6 @@ vtxLoop:
     );
 */
 
- #else
-   #error Please write inline ASM for your platform.
-
- #endif
-
 #else
 
     // diffuse mapping
diff --git a/Sources/Engine/Graphics/Fog.cpp b/Sources/Engine/Graphics/Fog.cpp
index bf5fb30..2945cb4 100644
--- a/Sources/Engine/Graphics/Fog.cpp
+++ b/Sources/Engine/Graphics/Fog.cpp
@@ -67,18 +67,7 @@ ULONG PrepareTexture( UBYTE *pubTexture, PIX pixSizeI, PIX pixSizeJ)
   // need to upload from RGBA format
   const PIX pixTextureSize = pixSizeI*pixSizeJ;
 
- #if (defined USE_PORTABLE_C)
-   const UBYTE* src = pubTexture;
-   DWORD* dst = (DWORD*)(pubTexture+pixTextureSize);
-   for (int i=0; i<pixTextureSize; i++) {
-    const DWORD tmp = ((DWORD)*src) | 0xFFFFFF00;
-    *dst = ((tmp << 24) & 0xff000000 ) | ((tmp <<  8) & 0x00ff0000 ) |
-      ((tmp >>  8) & 0x0000ff00 ) | ((tmp >> 24) & 0x000000ff );
-    src++;
-    dst++;
-   }
-
- #elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     mov     esi,D [pubTexture]
     mov     edi,D [pubTexture]
@@ -95,7 +84,7 @@ pixLoop:
     jnz     pixLoop
   }
 
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
   __asm__ __volatile__ (
     "movl    %[pubTexture], %%esi      \n\t"
     "movl    %[pixTextureSize], %%ecx  \n\t"
@@ -115,10 +104,18 @@ pixLoop:
         : "eax", "ecx", "esi", "edi", "cc", "memory"
   );
 
- #else
-   #error Write inline ASM for your platform.
+#else
+   const UBYTE* src = pubTexture;
+   DWORD* dst = (DWORD*)(pubTexture+pixTextureSize);
+   for (int i=0; i<pixTextureSize; i++) {
+    const DWORD tmp = ((DWORD)*src) | 0xFFFFFF00;
+    *dst = ((tmp << 24) & 0xff000000 ) | ((tmp <<  8) & 0x00ff0000 ) |
+      ((tmp >>  8) & 0x0000ff00 ) | ((tmp >> 24) & 0x000000ff );
+    src++;
+    dst++;
+   }
 
- #endif
+#endif
 
   // determine internal format
   extern INDEX gap_bAllowGrayTextures;
diff --git a/Sources/Engine/Graphics/Gfx_OpenGL_Textures.cpp b/Sources/Engine/Graphics/Gfx_OpenGL_Textures.cpp
index e694f7b..4ad76ab 100644
--- a/Sources/Engine/Graphics/Gfx_OpenGL_Textures.cpp
+++ b/Sources/Engine/Graphics/Gfx_OpenGL_Textures.cpp
@@ -169,32 +169,7 @@ void UploadTexture_OGL( ULONG *pulTexture, PIX pixSizeU, PIX pixSizeV,
       if( pixSizeV==0) pixSizeV=1;
       pixSize = pixSizeU*pixSizeV;
 
-      #if (defined USE_PORTABLE_C)
-      // Basically average every other pixel...
-      UWORD w = 0;
-      UBYTE *dptr = (UBYTE *) pulDst;
-      UBYTE *sptr = (UBYTE *) pulSrc;
-      #if 0
-      pixSize *= 4;
-      for (PIX i = 0; i < pixSize; i++)
-      {
-        *dptr = (UBYTE) ( (((UWORD) sptr[0]) + ((UWORD) sptr[1])) >> 1 );
-        dptr++;
-        sptr += 2;
-      }
-      #else
-      for (PIX i = 0; i < pixSize; i++)
-      {
-        for (PIX j = 0; j < 4; j++)
-        {
-          *dptr = (UBYTE) ( (((UWORD) sptr[0]) + ((UWORD) sptr[4])) >> 1 );
-          dptr++;
-          sptr++;
-        }
-        sptr += 4;
-      }
-      #endif
-      #elif (defined __MSVC_INLINE__)
+      #if (defined __MSVC_INLINE__)
       __asm {   
         pxor    mm0,mm0
         mov     esi,D [pulSrc]
@@ -244,7 +219,30 @@ void UploadTexture_OGL( ULONG *pulTexture, PIX pixSizeU, PIX pixSizeV,
       );
 
       #else
-      #error Please write inline ASM for your platform.
+      // Basically average every other pixel...
+      UWORD w = 0;
+      UBYTE *dptr = (UBYTE *) pulDst;
+      UBYTE *sptr = (UBYTE *) pulSrc;
+      #if 0
+      pixSize *= 4;
+      for (PIX i = 0; i < pixSize; i++)
+      {
+        *dptr = (UBYTE) ( (((UWORD) sptr[0]) + ((UWORD) sptr[1])) >> 1 );
+        dptr++;
+        sptr += 2;
+      }
+      #else
+      for (PIX i = 0; i < pixSize; i++)
+      {
+        for (PIX j = 0; j < 4; j++)
+        {
+          *dptr = (UBYTE) ( (((UWORD) sptr[0]) + ((UWORD) sptr[4])) >> 1 );
+          dptr++;
+          sptr++;
+        }
+        sptr += 4;
+      }
+      #endif
       #endif
 
       // upload mipmap
diff --git a/Sources/Engine/Graphics/Graphics.cpp b/Sources/Engine/Graphics/Graphics.cpp
index 4693d8c..3d4be59 100644
--- a/Sources/Engine/Graphics/Graphics.cpp
+++ b/Sources/Engine/Graphics/Graphics.cpp
@@ -209,58 +209,7 @@ static void MakeOneMipmap( ULONG *pulSrcMipmap, ULONG *pulDstMipmap, PIX pixWidt
   if( bBilinear) // type of filtering?
   { // BILINEAR
 
-   #if (defined USE_PORTABLE_C)
-	UBYTE *src = (UBYTE *) pulSrcMipmap;
-	UBYTE *dest = (UBYTE *) pulDstMipmap;
-	for (int i = 0 ; i < pixHeight; i++)
-	{
-		for (int j = 0; j < pixWidth; j++)
-		{
-			// Grab pixels from image
-			UWORD upleft[4];
-			UWORD upright[4];
-			UWORD downleft[4];
-			UWORD downright[4];
-			upleft[0] = *(src + 0);
-			upleft[1] = *(src + 1);
-			upleft[2] = *(src + 2);
-			upleft[3] = *(src + 3);
-			upright[0] = *(src + 4);
-			upright[1] = *(src + 5);
-			upright[2] = *(src + 6);
-			upright[3] = *(src + 7);
-
-			downleft[0] = *(src + pixWidth*8 + 0);
-			downleft[1] = *(src + pixWidth*8 + 1);
-			downleft[2] = *(src + pixWidth*8 + 2);
-			downleft[3] = *(src + pixWidth*8 + 3);
-			downright[0] = *(src + pixWidth*8 + 4);
-			downright[1] = *(src + pixWidth*8 + 5);
-			downright[2] = *(src + pixWidth*8 + 6);
-			downright[3] = *(src + pixWidth*8 + 7);
-
-			UWORD answer[4];
-			answer[0] = upleft[0] + upright[0] + downleft[0] + downright[0] + 2;
-			answer[1] = upleft[1] + upright[1] + downleft[1] + downright[1] + 2;
-			answer[2] = upleft[2] + upright[2] + downleft[2] + downright[2] + 2;
-			answer[3] = upleft[3] + upright[3] + downleft[3] + downright[3] + 2;
-			answer[0] /= 4;
-			answer[1] /= 4;
-			answer[2] /= 4;
-			answer[3] /= 4;
-
-			*(dest + 0) = answer[0];
-			*(dest + 1) = answer[1];
-			*(dest + 2) = answer[2];
-			*(dest + 3) = answer[3];
-
-			src += 8;
-			dest += 4;
-		}
-		src += 8*pixWidth;
-    }
-
-   #elif (defined __MSVC_INLINE__)
+   #if (defined __MSVC_INLINE__)
     __asm {
       pxor    mm0,mm0
       mov     ebx,D [pixWidth]
@@ -346,43 +295,63 @@ pixLoopN:
     );
 
    #else
-     #error Write inline asm for your platform.
+	UBYTE *src = (UBYTE *) pulSrcMipmap;
+	UBYTE *dest = (UBYTE *) pulDstMipmap;
+	for (int i = 0 ; i < pixHeight; i++)
+	{
+		for (int j = 0; j < pixWidth; j++)
+		{
+			// Grab pixels from image
+			UWORD upleft[4];
+			UWORD upright[4];
+			UWORD downleft[4];
+			UWORD downright[4];
+			upleft[0] = *(src + 0);
+			upleft[1] = *(src + 1);
+			upleft[2] = *(src + 2);
+			upleft[3] = *(src + 3);
+			upright[0] = *(src + 4);
+			upright[1] = *(src + 5);
+			upright[2] = *(src + 6);
+			upright[3] = *(src + 7);
+
+			downleft[0] = *(src + pixWidth*8 + 0);
+			downleft[1] = *(src + pixWidth*8 + 1);
+			downleft[2] = *(src + pixWidth*8 + 2);
+			downleft[3] = *(src + pixWidth*8 + 3);
+			downright[0] = *(src + pixWidth*8 + 4);
+			downright[1] = *(src + pixWidth*8 + 5);
+			downright[2] = *(src + pixWidth*8 + 6);
+			downright[3] = *(src + pixWidth*8 + 7);
+
+			UWORD answer[4];
+			answer[0] = upleft[0] + upright[0] + downleft[0] + downright[0] + 2;
+			answer[1] = upleft[1] + upright[1] + downleft[1] + downright[1] + 2;
+			answer[2] = upleft[2] + upright[2] + downleft[2] + downright[2] + 2;
+			answer[3] = upleft[3] + upright[3] + downleft[3] + downright[3] + 2;
+			answer[0] /= 4;
+			answer[1] /= 4;
+			answer[2] /= 4;
+			answer[3] /= 4;
+
+			*(dest + 0) = answer[0];
+			*(dest + 1) = answer[1];
+			*(dest + 2) = answer[2];
+			*(dest + 3) = answer[3];
+
+			src += 8;
+			dest += 4;
+		}
+		src += 8*pixWidth;
+    }
+
    #endif
     }
     else
     { // NEAREST-NEIGHBOUR but with border preserving
        ULONG ulRowModulo = pixWidth*2 *BYTES_PER_TEXEL;
 
-   #if (defined USE_PORTABLE_C)
-
-     PIX offset = 0;
-     ulRowModulo /= 4;
-
-     for (int q = 0; q < 2; q++)
-     {
-         for (PIX i = pixHeight / 2; i > 0; i--)
-         {
-             for (PIX j = pixWidth / 2; j > 0; j--)
-             {
-                 *pulDstMipmap = *(pulSrcMipmap + offset);
-                 pulSrcMipmap += 2;
-                 pulDstMipmap++;
-             }
-
-             for (PIX j = pixWidth / 2; j > 0; j--)
-             {
-                 *pulDstMipmap = *(pulSrcMipmap + offset + 1);
-                 pulSrcMipmap += 2;
-                 pulDstMipmap++;
-             }
-
-             pulSrcMipmap += ulRowModulo;
-        }
-
-        offset = pixWidth * 2;
-     }
-
-   #elif (defined __MSVC_INLINE__)
+   #if (defined __MSVC_INLINE__)
     __asm {
       xor     ebx,ebx
       mov     esi,D [pulSrcMipmap]
@@ -493,7 +462,33 @@ fullEnd:
     );
 
    #else
-     #error Write inline asm for your platform.
+     PIX offset = 0;
+     ulRowModulo /= 4;
+
+     for (int q = 0; q < 2; q++)
+     {
+         for (PIX i = pixHeight / 2; i > 0; i--)
+         {
+             for (PIX j = pixWidth / 2; j > 0; j--)
+             {
+                 *pulDstMipmap = *(pulSrcMipmap + offset);
+                 pulSrcMipmap += 2;
+                 pulDstMipmap++;
+             }
+
+             for (PIX j = pixWidth / 2; j > 0; j--)
+             {
+                 *pulDstMipmap = *(pulSrcMipmap + offset + 1);
+                 pulSrcMipmap += 2;
+                 pulDstMipmap++;
+             }
+
+             pulSrcMipmap += ulRowModulo;
+        }
+
+        offset = pixWidth * 2;
+     }
+
    #endif
   }
 }
@@ -649,7 +644,7 @@ __int64 mmShifter = 0;
 __int64 mmMask  = 0;
 ULONG *pulDitherTable;
 
-#ifdef USE_PORTABLE_C
+#if !(defined __MSVC_INLINE__) && !(defined __GNU_INLINE_X86_32__)
 extern const UBYTE *pubClipByte;
 // increment a byte without overflowing it
 static inline void IncrementByteWithClip( UBYTE &ub, SLONG slAdd)
@@ -778,35 +773,7 @@ void DitherBitmap( INDEX iDitherType, ULONG *pulSrc, ULONG *pulDst, PIX pixWidth
 // ------------------------------- ordered matrix dithering routine
 
 ditherOrder:
-#if (defined USE_PORTABLE_C)
-  union uConv
-  {
-    ULONG val;
-    DWORD dwords[2];
-    UWORD words[4];
-    WORD  iwords[4];
-    UBYTE bytes[8];
-  };
-  for (int i=0; i<pixHeight; i++) {
-    int idx = i&3;
-    uConv dith;
-    dith.val = pulDitherTable[idx];
-    for (int j=0; j<4; j++) { dith.words[j] >>= mmShifter; }
-    dith.val &= mmMask;
-    uConv* src = (uConv*)(pulSrc+i*pixWidth);
-    uConv* dst = (uConv*)(pulDst+i*pixWidth);
-    for (int j=0; j<pixWidth; j+=2) {
-      uConv p=src[0];
-      for (int k=0; k<8; k++) {
-        IncrementByteWithClip(p.bytes[k], dith.bytes[k]);
-      }
-      dst[0] = p;
-      src++;
-      dst++;
-    }
-  }
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     mov     esi,D [pulSrc]
     mov     edi,D [pulDst]
@@ -912,7 +879,33 @@ nextRowO:
   );
 
 #else
-  #error Write inline asm for your platform.
+  union uConv
+  {
+    ULONG val;
+    DWORD dwords[2];
+    UWORD words[4];
+    WORD  iwords[4];
+    UBYTE bytes[8];
+  };
+  for (int i=0; i<pixHeight; i++) {
+    int idx = i&3;
+    uConv dith;
+    dith.val = pulDitherTable[idx];
+    for (int j=0; j<4; j++) { dith.words[j] >>= mmShifter; }
+    dith.val &= mmMask;
+    uConv* src = (uConv*)(pulSrc+i*pixWidth);
+    uConv* dst = (uConv*)(pulDst+i*pixWidth);
+    for (int j=0; j<pixWidth; j+=2) {
+      uConv p=src[0];
+      for (int k=0; k<8; k++) {
+        IncrementByteWithClip(p.bytes[k], dith.bytes[k]);
+      }
+      dst[0] = p;
+      src++;
+      dst++;
+    }
+  }
+
 #endif
 
   goto theEnd;
@@ -924,34 +917,7 @@ ditherError:
   if( pulDst!=pulSrc) memcpy( pulDst, pulSrc, pixCanvasWidth*pixCanvasHeight *BYTES_PER_TEXEL);
   // slModulo+=4;
   // now, dither destination
-#if (defined USE_PORTABLE_C)
-  #if 1 //SEB doesn't works....
-  for (int i=0; i<pixHeight-1; i++) {
-    int step = (i&1)?-4:+4;
-    const UBYTE ubMask = (mmErrDiffMask&0xff);
-    UBYTE *src = ((UBYTE*)pulDst)+i*pixCanvasWidth*4;
-    if(i&1) src+=pixWidth*4;
-    // left to right or right to left
-    for (int j=0; j<pixWidth-1; j++) {
-      uConv p1, p3, p5, p7;
-      src+=step;
-      for (int k=0; k<4; k++) { p1.words[k] = src[k]&ubMask; }
-      //p1.val &= mmErrDiffMask;
-      for (int k=0; k<4; k++) { p3.words[k] = (p1.words[k]*3)>>4;
-                                p5.words[k] = (p1.words[k]*5)>>4;
-                                p7.words[k] = (p1.words[k]*7)>>4; }
-      for (int k=0; k<4; k++) { p1.words[k] -= (p3.words[k] + p5.words[k] + p7.words[k]);}
-      for (int k=0; k<4; k++) { 
-        IncrementByteWithClip( src[k + step]                 , p7.words[k]);
-        IncrementByteWithClip( src[pixCanvasWidth*4 -step +k], p5.words[k]);
-        IncrementByteWithClip( src[pixCanvasWidth*4 +0    +k], p3.words[k]);
-        IncrementByteWithClip( src[pixCanvasWidth*4 +step +k], p1.words[k]);
-      }
-    }
-  }
-  #endif
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     pxor    mm0,mm0
     mov     esi,D [pulDst]
@@ -1157,7 +1123,32 @@ allDoneE:
   );
 
 #else
-  #error Write inline asm for your platform.
+  #if 1 //SEB doesn't works....
+  for (int i=0; i<pixHeight-1; i++) {
+    int step = (i&1)?-4:+4;
+    const UBYTE ubMask = (mmErrDiffMask&0xff);
+    UBYTE *src = ((UBYTE*)pulDst)+i*pixCanvasWidth*4;
+    if(i&1) src+=pixWidth*4;
+    // left to right or right to left
+    for (int j=0; j<pixWidth-1; j++) {
+      uConv p1, p3, p5, p7;
+      src+=step;
+      for (int k=0; k<4; k++) { p1.words[k] = src[k]&ubMask; }
+      //p1.val &= mmErrDiffMask;
+      for (int k=0; k<4; k++) { p3.words[k] = (p1.words[k]*3)>>4;
+                                p5.words[k] = (p1.words[k]*5)>>4;
+                                p7.words[k] = (p1.words[k]*7)>>4; }
+      for (int k=0; k<4; k++) { p1.words[k] -= (p3.words[k] + p5.words[k] + p7.words[k]);}
+      for (int k=0; k<4; k++) { 
+        IncrementByteWithClip( src[k + step]                 , p7.words[k]);
+        IncrementByteWithClip( src[pixCanvasWidth*4 -step +k], p5.words[k]);
+        IncrementByteWithClip( src[pixCanvasWidth*4 +0    +k], p3.words[k]);
+        IncrementByteWithClip( src[pixCanvasWidth*4 +step +k], p1.words[k]);
+      }
+    }
+  }
+  #endif
+
 #endif
 
   goto theEnd;
@@ -1265,7 +1256,7 @@ extern "C" {
 }
 
 
-#ifdef USE_PORTABLE_C
+#if !(defined USE_MMX_INTRINSICS) && !(defined __MSVC_INLINE__) && !(defined __GNU_INLINE_X86_32__)
 typedef SWORD ExtPix[4];
 
 static inline void extpix_fromi64(ExtPix &pix, const __int64 i64)
@@ -1632,265 +1623,6 @@ void FilterBitmap( INDEX iFilter, ULONG *pulSrc, ULONG *pulDst, PIX pixWidth, PI
     _mm_empty();  // we're done, clear out the MMX registers!
 
 
-#elif (defined USE_PORTABLE_C)
-    slModulo1 /= BYTES_PER_TEXEL;  // C++ handles incrementing by sizeof type
-    slCanvasWidth /= BYTES_PER_TEXEL;  // C++ handles incrementing by sizeof type
-
-    ULONG *src = pulSrc;
-    ULONG *dst = pulDst;
-    ULONG *rowptr = aulRows;
-
-    ExtPix rmm1={0}, rmm2={0}, rmm3={0}, rmm4={0}, rmm5={0}, rmm6={0}, rmm7={0};
-    #define EXTPIXFROMINT64(x) ExtPix r##x; extpix_fromi64(r##x, x);
-    EXTPIXFROMINT64(mmCm);
-    EXTPIXFROMINT64(mmCe);
-    EXTPIXFROMINT64(mmCc);
-    EXTPIXFROMINT64(mmEch);
-    EXTPIXFROMINT64(mmEcl);
-    EXTPIXFROMINT64(mmEe);
-    EXTPIXFROMINT64(mmEm);
-    EXTPIXFROMINT64(mmMm);
-    EXTPIXFROMINT64(mmMe);
-    EXTPIXFROMINT64(mmMc);
-    EXTPIXFROMINT64(mmAdd);
-    EXTPIXFROMINT64(mmInvDiv);
-    #undef EXTPIXFROMINT64
-
-    // ----------------------- process upper left corner
-    extend_pixel(src[0], rmm1);
-    extend_pixel(src[1], rmm2);
-    extend_pixel(src[pixCanvasWidth], rmm3);
-    extend_pixel(src[pixCanvasWidth+1], rmm4);
-
-    extpix_add(rmm2, rmm3);
-    extpix_mul(rmm1, rmmCm);
-    extpix_mul(rmm2, rmmCe);
-    extpix_mul(rmm4, rmmCc);
-    extpix_add(rmm1, rmm2);
-    extpix_add(rmm1, rmm4);
-    extpix_adds(rmm1, rmmAdd);
-    extpix_mulhi(rmm1, rmmInvDiv);
-    *(rowptr++) = unextend_pixel(rmm1);
-    
-    src++;
-
-    // ----------------------- process upper edge pixels
-    for (PIX i = pixWidth - 2; i != 0; i--)
-    {
-        extend_pixel(src[-1], rmm1);
-        extend_pixel(src[0], rmm2);
-        extend_pixel(src[1], rmm3);
-        extend_pixel(src[pixCanvasWidth-1], rmm4);
-        extend_pixel(src[pixCanvasWidth], rmm5);
-        extend_pixel(src[pixCanvasWidth+1], rmm6);
-
-        extpix_add(rmm1, rmm3);
-        extpix_add(rmm4, rmm6);
-        extpix_mul(rmm1, rmmEch);
-        extpix_mul(rmm2, rmmEm);
-        extpix_mul(rmm4, rmmEcl);
-        extpix_mul(rmm5, rmmEe);
-        extpix_add(rmm1, rmm2);
-        extpix_add(rmm1, rmm4);
-        extpix_add(rmm1, rmm5);
-        extpix_adds(rmm1, rmmAdd);
-        extpix_mulhi(rmm1, rmmInvDiv);
-        *(rowptr++) = unextend_pixel(rmm1);
-        src++;
-    }
-
-    // ----------------------- process upper right corner
-
-    extend_pixel(src[-1], rmm1);
-    extend_pixel(src[0], rmm2);
-    extend_pixel(src[pixCanvasWidth-1], rmm3);
-    extend_pixel(src[pixCanvasWidth], rmm4);
-
-    extpix_add(rmm1, rmm4);
-    extpix_mul(rmm1, rmmCe);
-    extpix_mul(rmm2, rmmCm);
-    extpix_mul(rmm3, rmmCc);
-    extpix_add(rmm1, rmm2);
-    extpix_add(rmm1, rmm3);
-    extpix_adds(rmm1, rmmAdd);
-    extpix_mulhi(rmm1, rmmInvDiv);
-    *rowptr = unextend_pixel(rmm1);
-
-// ----------------------- process bitmap middle pixels
-
-    dst += slCanvasWidth;
-    src += slModulo1;
-
-    // for each row
-    for (size_t i = pixHeight-2; i != 0; i--)  // rowLoop
-    {
-        rowptr = aulRows;
-
-        // process left edge pixel
-        extend_pixel(src[-pixCanvasWidth], rmm1);
-        extend_pixel(src[(-pixCanvasWidth)+1], rmm2);
-        extend_pixel(src[0], rmm3);
-        extend_pixel(src[1], rmm4);
-        extend_pixel(src[pixCanvasWidth], rmm5);
-        extend_pixel(src[pixCanvasWidth+1], rmm6);
-
-        extpix_add(rmm1, rmm5);
-        extpix_add(rmm2, rmm6);
-        extpix_mul(rmm1, rmmEch);
-        extpix_mul(rmm2, rmmEcl);
-        extpix_mul(rmm3, rmmEm);
-        extpix_mul(rmm4, rmmEe);
-        extpix_add(rmm1, rmm2);
-        extpix_add(rmm1, rmm3);
-        extpix_add(rmm1, rmm4);
-        extpix_adds(rmm1, rmmAdd);
-        extpix_mulhi(rmm1, rmmInvDiv);
-        dst[-pixCanvasWidth] = *rowptr;
-        *(rowptr++) = unextend_pixel(rmm1);
-        src++;
-        dst++;
-
-        // for each pixel in current row
-        for (size_t j = pixWidth-2; j != 0; j--)  // pixLoop
-        {
-            // prepare upper convolution row
-            extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
-            extend_pixel(src[-pixCanvasWidth], rmm2);
-            extend_pixel(src[(-pixCanvasWidth)+1], rmm3);
-
-            // prepare middle convolution row
-            extend_pixel(src[-1], rmm4);
-            extend_pixel(src[0], rmm5);
-            extend_pixel(src[1], rmm6);
-
-            // free some registers
-            extpix_add(rmm1, rmm3);
-            extpix_add(rmm2, rmm4);
-            extpix_mul(rmm5, rmmMm);
-
-            // prepare lower convolution row
-            extend_pixel(src[pixCanvasWidth-1], rmm3);
-            extend_pixel(src[pixCanvasWidth], rmm4);
-            extend_pixel(src[pixCanvasWidth+1], rmm7);
-
-            // calc weightened value
-            extpix_add(rmm2, rmm6);
-            extpix_add(rmm1, rmm3);
-            extpix_add(rmm2, rmm4);
-            extpix_add(rmm1, rmm7);
-            extpix_mul(rmm2, rmmMe);
-            extpix_mul(rmm1, rmmMc);
-            extpix_add(rmm2, rmm5);
-            extpix_add(rmm1, rmm2);
-
-            // calc and store wightened value
-            extpix_adds(rmm1, rmmAdd);
-            extpix_mulhi(rmm1, rmmInvDiv);
-            dst[-pixCanvasWidth] = *rowptr;
-            *(rowptr++) = unextend_pixel(rmm1);
-
-            // advance to next pixel
-            src++;
-            dst++;
-        }
-
-        // process right edge pixel
-        extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
-        extend_pixel(src[-pixCanvasWidth], rmm2);
-        extend_pixel(src[-1], rmm3);
-        extend_pixel(src[0], rmm4);
-        extend_pixel(src[pixCanvasWidth-1], rmm5);
-        extend_pixel(src[pixCanvasWidth], rmm6);
-
-        extpix_add(rmm1, rmm5);
-        extpix_add(rmm2, rmm6);
-        extpix_mul(rmm1, rmmEcl);
-        extpix_mul(rmm2, rmmEch);
-        extpix_mul(rmm3, rmmEe);
-        extpix_mul(rmm4, rmmEm);
-        extpix_add(rmm1, rmm2);
-        extpix_add(rmm1, rmm3);
-        extpix_add(rmm1, rmm4);
-        extpix_adds(rmm1, rmmAdd);
-        extpix_mulhi(rmm1, rmmInvDiv);
-        dst[-pixCanvasWidth] = *rowptr;
-        *rowptr = unextend_pixel(rmm1);
-
-        // advance to next row
-        src += slModulo1;
-        dst += slModulo1;
-    }
-
-    // ----------------------- process lower left corner
-    rowptr = aulRows;
-    extend_pixel(src[-pixCanvasWidth], rmm1);
-    extend_pixel(src[(-pixCanvasWidth)+1], rmm2);
-    extend_pixel(src[0], rmm3);
-    extend_pixel(src[1], rmm4);
-
-    extpix_add(rmm1, rmm4);
-    extpix_mul(rmm1, rmmCe);
-    extpix_mul(rmm2, rmmCc);
-    extpix_mul(rmm3, rmmCm);
-    extpix_add(rmm1, rmm2);
-    extpix_add(rmm1, rmm3);
-    extpix_adds(rmm1, rmmAdd);
-    extpix_mulhi(rmm1, rmmInvDiv);
-    dst[-pixCanvasWidth] = *rowptr;
-    dst[0] = unextend_pixel(rmm1);
-
-    src++;
-    dst++;
-    rowptr++;
-
-    // ----------------------- process lower edge pixels
-    for (size_t i = pixWidth-2; i != 0; i--)  // lowerLoop
-    {
-        // for each pixel
-        extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
-        extend_pixel(src[-pixCanvasWidth], rmm2);
-        extend_pixel(src[(-pixCanvasWidth)+1], rmm3);
-        extend_pixel(src[-1], rmm4);
-        extend_pixel(src[0], rmm5);
-        extend_pixel(src[1], rmm6);
-
-        extpix_add(rmm1, rmm3);
-        extpix_add(rmm4, rmm6);
-        extpix_mul(rmm1, rmmEcl);
-        extpix_mul(rmm2, rmmEe);
-        extpix_mul(rmm4, rmmEch);
-        extpix_mul(rmm5, rmmEm);
-        extpix_add(rmm1, rmm2);
-        extpix_add(rmm1, rmm4);
-        extpix_add(rmm1, rmm5);
-        extpix_adds(rmm1, rmmAdd);
-        extpix_mulhi(rmm1, rmmInvDiv);
-        dst[-pixCanvasWidth] = *rowptr;
-        dst[0] = unextend_pixel(rmm1);
-
-        // advance to next pixel
-        src++;
-        dst++;
-        rowptr++;
-    }
-
-    // ----------------------- lower right corners
-    extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
-    extend_pixel(src[-pixCanvasWidth], rmm2);
-    extend_pixel(src[-1], rmm3);
-    extend_pixel(src[0], rmm4);
-
-    extpix_add(rmm2, rmm3);
-    extpix_mul(rmm1, rmmCc);
-    extpix_mul(rmm2, rmmCe);
-    extpix_mul(rmm4, rmmCm);
-    extpix_add(rmm1, rmm2);
-    extpix_add(rmm1, rmm4);
-    extpix_adds(rmm1, rmmAdd);
-    extpix_mulhi(rmm1, rmmInvDiv);
-    dst[-pixCanvasWidth] = *rowptr;
-    dst[0] = unextend_pixel(rmm1);
-
 #elif (defined __MSVC_INLINE__)
   __asm {
     cld
@@ -2537,7 +2269,264 @@ lowerLoop:
   );
 
 #else
-  #error Write inline asm for your platform.
+    slModulo1 /= BYTES_PER_TEXEL;  // C++ handles incrementing by sizeof type
+    slCanvasWidth /= BYTES_PER_TEXEL;  // C++ handles incrementing by sizeof type
+
+    ULONG *src = pulSrc;
+    ULONG *dst = pulDst;
+    ULONG *rowptr = aulRows;
+
+    ExtPix rmm1={0}, rmm2={0}, rmm3={0}, rmm4={0}, rmm5={0}, rmm6={0}, rmm7={0};
+    #define EXTPIXFROMINT64(x) ExtPix r##x; extpix_fromi64(r##x, x);
+    EXTPIXFROMINT64(mmCm);
+    EXTPIXFROMINT64(mmCe);
+    EXTPIXFROMINT64(mmCc);
+    EXTPIXFROMINT64(mmEch);
+    EXTPIXFROMINT64(mmEcl);
+    EXTPIXFROMINT64(mmEe);
+    EXTPIXFROMINT64(mmEm);
+    EXTPIXFROMINT64(mmMm);
+    EXTPIXFROMINT64(mmMe);
+    EXTPIXFROMINT64(mmMc);
+    EXTPIXFROMINT64(mmAdd);
+    EXTPIXFROMINT64(mmInvDiv);
+    #undef EXTPIXFROMINT64
+
+    // ----------------------- process upper left corner
+    extend_pixel(src[0], rmm1);
+    extend_pixel(src[1], rmm2);
+    extend_pixel(src[pixCanvasWidth], rmm3);
+    extend_pixel(src[pixCanvasWidth+1], rmm4);
+
+    extpix_add(rmm2, rmm3);
+    extpix_mul(rmm1, rmmCm);
+    extpix_mul(rmm2, rmmCe);
+    extpix_mul(rmm4, rmmCc);
+    extpix_add(rmm1, rmm2);
+    extpix_add(rmm1, rmm4);
+    extpix_adds(rmm1, rmmAdd);
+    extpix_mulhi(rmm1, rmmInvDiv);
+    *(rowptr++) = unextend_pixel(rmm1);
+    
+    src++;
+
+    // ----------------------- process upper edge pixels
+    for (PIX i = pixWidth - 2; i != 0; i--)
+    {
+        extend_pixel(src[-1], rmm1);
+        extend_pixel(src[0], rmm2);
+        extend_pixel(src[1], rmm3);
+        extend_pixel(src[pixCanvasWidth-1], rmm4);
+        extend_pixel(src[pixCanvasWidth], rmm5);
+        extend_pixel(src[pixCanvasWidth+1], rmm6);
+
+        extpix_add(rmm1, rmm3);
+        extpix_add(rmm4, rmm6);
+        extpix_mul(rmm1, rmmEch);
+        extpix_mul(rmm2, rmmEm);
+        extpix_mul(rmm4, rmmEcl);
+        extpix_mul(rmm5, rmmEe);
+        extpix_add(rmm1, rmm2);
+        extpix_add(rmm1, rmm4);
+        extpix_add(rmm1, rmm5);
+        extpix_adds(rmm1, rmmAdd);
+        extpix_mulhi(rmm1, rmmInvDiv);
+        *(rowptr++) = unextend_pixel(rmm1);
+        src++;
+    }
+
+    // ----------------------- process upper right corner
+
+    extend_pixel(src[-1], rmm1);
+    extend_pixel(src[0], rmm2);
+    extend_pixel(src[pixCanvasWidth-1], rmm3);
+    extend_pixel(src[pixCanvasWidth], rmm4);
+
+    extpix_add(rmm1, rmm4);
+    extpix_mul(rmm1, rmmCe);
+    extpix_mul(rmm2, rmmCm);
+    extpix_mul(rmm3, rmmCc);
+    extpix_add(rmm1, rmm2);
+    extpix_add(rmm1, rmm3);
+    extpix_adds(rmm1, rmmAdd);
+    extpix_mulhi(rmm1, rmmInvDiv);
+    *rowptr = unextend_pixel(rmm1);
+
+// ----------------------- process bitmap middle pixels
+
+    dst += slCanvasWidth;
+    src += slModulo1;
+
+    // for each row
+    for (size_t i = pixHeight-2; i != 0; i--)  // rowLoop
+    {
+        rowptr = aulRows;
+
+        // process left edge pixel
+        extend_pixel(src[-pixCanvasWidth], rmm1);
+        extend_pixel(src[(-pixCanvasWidth)+1], rmm2);
+        extend_pixel(src[0], rmm3);
+        extend_pixel(src[1], rmm4);
+        extend_pixel(src[pixCanvasWidth], rmm5);
+        extend_pixel(src[pixCanvasWidth+1], rmm6);
+
+        extpix_add(rmm1, rmm5);
+        extpix_add(rmm2, rmm6);
+        extpix_mul(rmm1, rmmEch);
+        extpix_mul(rmm2, rmmEcl);
+        extpix_mul(rmm3, rmmEm);
+        extpix_mul(rmm4, rmmEe);
+        extpix_add(rmm1, rmm2);
+        extpix_add(rmm1, rmm3);
+        extpix_add(rmm1, rmm4);
+        extpix_adds(rmm1, rmmAdd);
+        extpix_mulhi(rmm1, rmmInvDiv);
+        dst[-pixCanvasWidth] = *rowptr;
+        *(rowptr++) = unextend_pixel(rmm1);
+        src++;
+        dst++;
+
+        // for each pixel in current row
+        for (size_t j = pixWidth-2; j != 0; j--)  // pixLoop
+        {
+            // prepare upper convolution row
+            extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
+            extend_pixel(src[-pixCanvasWidth], rmm2);
+            extend_pixel(src[(-pixCanvasWidth)+1], rmm3);
+
+            // prepare middle convolution row
+            extend_pixel(src[-1], rmm4);
+            extend_pixel(src[0], rmm5);
+            extend_pixel(src[1], rmm6);
+
+            // free some registers
+            extpix_add(rmm1, rmm3);
+            extpix_add(rmm2, rmm4);
+            extpix_mul(rmm5, rmmMm);
+
+            // prepare lower convolution row
+            extend_pixel(src[pixCanvasWidth-1], rmm3);
+            extend_pixel(src[pixCanvasWidth], rmm4);
+            extend_pixel(src[pixCanvasWidth+1], rmm7);
+
+            // calc weightened value
+            extpix_add(rmm2, rmm6);
+            extpix_add(rmm1, rmm3);
+            extpix_add(rmm2, rmm4);
+            extpix_add(rmm1, rmm7);
+            extpix_mul(rmm2, rmmMe);
+            extpix_mul(rmm1, rmmMc);
+            extpix_add(rmm2, rmm5);
+            extpix_add(rmm1, rmm2);
+
+            // calc and store wightened value
+            extpix_adds(rmm1, rmmAdd);
+            extpix_mulhi(rmm1, rmmInvDiv);
+            dst[-pixCanvasWidth] = *rowptr;
+            *(rowptr++) = unextend_pixel(rmm1);
+
+            // advance to next pixel
+            src++;
+            dst++;
+        }
+
+        // process right edge pixel
+        extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
+        extend_pixel(src[-pixCanvasWidth], rmm2);
+        extend_pixel(src[-1], rmm3);
+        extend_pixel(src[0], rmm4);
+        extend_pixel(src[pixCanvasWidth-1], rmm5);
+        extend_pixel(src[pixCanvasWidth], rmm6);
+
+        extpix_add(rmm1, rmm5);
+        extpix_add(rmm2, rmm6);
+        extpix_mul(rmm1, rmmEcl);
+        extpix_mul(rmm2, rmmEch);
+        extpix_mul(rmm3, rmmEe);
+        extpix_mul(rmm4, rmmEm);
+        extpix_add(rmm1, rmm2);
+        extpix_add(rmm1, rmm3);
+        extpix_add(rmm1, rmm4);
+        extpix_adds(rmm1, rmmAdd);
+        extpix_mulhi(rmm1, rmmInvDiv);
+        dst[-pixCanvasWidth] = *rowptr;
+        *rowptr = unextend_pixel(rmm1);
+
+        // advance to next row
+        src += slModulo1;
+        dst += slModulo1;
+    }
+
+    // ----------------------- process lower left corner
+    rowptr = aulRows;
+    extend_pixel(src[-pixCanvasWidth], rmm1);
+    extend_pixel(src[(-pixCanvasWidth)+1], rmm2);
+    extend_pixel(src[0], rmm3);
+    extend_pixel(src[1], rmm4);
+
+    extpix_add(rmm1, rmm4);
+    extpix_mul(rmm1, rmmCe);
+    extpix_mul(rmm2, rmmCc);
+    extpix_mul(rmm3, rmmCm);
+    extpix_add(rmm1, rmm2);
+    extpix_add(rmm1, rmm3);
+    extpix_adds(rmm1, rmmAdd);
+    extpix_mulhi(rmm1, rmmInvDiv);
+    dst[-pixCanvasWidth] = *rowptr;
+    dst[0] = unextend_pixel(rmm1);
+
+    src++;
+    dst++;
+    rowptr++;
+
+    // ----------------------- process lower edge pixels
+    for (size_t i = pixWidth-2; i != 0; i--)  // lowerLoop
+    {
+        // for each pixel
+        extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
+        extend_pixel(src[-pixCanvasWidth], rmm2);
+        extend_pixel(src[(-pixCanvasWidth)+1], rmm3);
+        extend_pixel(src[-1], rmm4);
+        extend_pixel(src[0], rmm5);
+        extend_pixel(src[1], rmm6);
+
+        extpix_add(rmm1, rmm3);
+        extpix_add(rmm4, rmm6);
+        extpix_mul(rmm1, rmmEcl);
+        extpix_mul(rmm2, rmmEe);
+        extpix_mul(rmm4, rmmEch);
+        extpix_mul(rmm5, rmmEm);
+        extpix_add(rmm1, rmm2);
+        extpix_add(rmm1, rmm4);
+        extpix_add(rmm1, rmm5);
+        extpix_adds(rmm1, rmmAdd);
+        extpix_mulhi(rmm1, rmmInvDiv);
+        dst[-pixCanvasWidth] = *rowptr;
+        dst[0] = unextend_pixel(rmm1);
+
+        // advance to next pixel
+        src++;
+        dst++;
+        rowptr++;
+    }
+
+    // ----------------------- lower right corners
+    extend_pixel(src[(-pixCanvasWidth)-1], rmm1);
+    extend_pixel(src[-pixCanvasWidth], rmm2);
+    extend_pixel(src[-1], rmm3);
+    extend_pixel(src[0], rmm4);
+
+    extpix_add(rmm2, rmm3);
+    extpix_mul(rmm1, rmmCc);
+    extpix_mul(rmm2, rmmCe);
+    extpix_mul(rmm4, rmmCm);
+    extpix_add(rmm1, rmm2);
+    extpix_add(rmm1, rmm4);
+    extpix_adds(rmm1, rmmAdd);
+    extpix_mulhi(rmm1, rmmInvDiv);
+    dst[-pixCanvasWidth] = *rowptr;
+    dst[0] = unextend_pixel(rmm1);
+
 #endif
 
   // all done (finally)
diff --git a/Sources/Engine/Graphics/OpenGL.h b/Sources/Engine/Graphics/OpenGL.h
index fe3f137..37b5038 100644
--- a/Sources/Engine/Graphics/OpenGL.h
+++ b/Sources/Engine/Graphics/OpenGL.h
@@ -89,13 +89,7 @@ extern void  (__stdcall *pglPNTrianglesfATI)( GLenum pname, GLfloat param);
 inline void glCOLOR( COLOR col)
 {
 /* rcg10052001 Platform-wrappers. */
-#if (defined USE_PORTABLE_C)
-	col = ( ((col << 24)            ) |
-            ((col << 8) & 0x00FF0000) |
-            ((col >> 8) & 0x0000FF00) |
-            ((col >> 24)            ) );
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     mov     eax,dword ptr [col]
     bswap   eax
@@ -110,7 +104,11 @@ inline void glCOLOR( COLOR col)
   );
 
 #else
-  #error please define for your platform.
+  col = ( ((col << 24)            ) |
+          ((col << 8) & 0x00FF0000) |
+          ((col >> 8) & 0x0000FF00) |
+          ((col >> 24)            ) );
+
 #endif
 
   pglColor4ubv((GLubyte*)&col);
diff --git a/Sources/Engine/Graphics/TextureEffects.cpp b/Sources/Engine/Graphics/TextureEffects.cpp
index b50a33e..91fc839 100644
--- a/Sources/Engine/Graphics/TextureEffects.cpp
+++ b/Sources/Engine/Graphics/TextureEffects.cpp
@@ -32,9 +32,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #define W  word ptr
 #define B  byte ptr
 
-#if (defined USE_PORTABLE_C)
-#define ASMOPT 0
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
 #define ASMOPT 1
 #elif (defined __GNU_INLINE_X86_32__)
 #define ASMOPT 1
@@ -1285,8 +1283,7 @@ static void RenderWater(void)
   { // SUB-SAMPLING
     SLONG slHeightMapStep, slHeightRowStep;
 
-#if ASMOPT == 1
-  #if (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
     __asm {
       push    ebx
       bsf     ecx,D [_pixTexWidth]
@@ -1357,7 +1354,7 @@ pixLoop:
       pop     ebx
     }
 
-  #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
     // rcg12152001 needed extra registers. :(
     _slHeightMapStep_renderWater = slHeightMapStep;
     _pixBaseWidth_renderWater = pixBaseWidth;
@@ -1460,10 +1457,6 @@ pixLoop:
           "cc", "memory"
     );
 
-  #else
-    #error fill in for your platform.
-  #endif
-
 #else
 
     PIX pixPos, pixDU, pixDV;
diff --git a/Sources/Engine/Light/LayerMixer.cpp b/Sources/Engine/Light/LayerMixer.cpp
index 0f1b8d0..26c0e51 100755
--- a/Sources/Engine/Light/LayerMixer.cpp
+++ b/Sources/Engine/Light/LayerMixer.cpp
@@ -40,16 +40,6 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #define W  word ptr
 #define B  byte ptr
 
-#if (defined USE_PORTABLE_C)
-  #define ASMOPT 0
-#elif (defined __MSVC_INLINE__)
-  #define ASMOPT 1
-#elif (defined __GNU_INLINE_X86_32__)
-  #define ASMOPT 1
-#else
-  #define ASMOPT 0
-#endif
-
 extern INDEX shd_bFineQuality;
 extern INDEX shd_iFiltering;
 extern INDEX shd_iDithering;
@@ -290,8 +280,7 @@ void CLayerMixer::AddAmbientPoint(void)
   _slLightMax<<=7;
   _slLightStep>>=1;
 
-#if (ASMOPT == 1)
- #if (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     // prepare interpolants
     movd    mm0,D [_slL2Row]
@@ -364,7 +353,7 @@ skipPixel:
     emms
   }
 
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
   ULONG tmp1, tmp2;
   __asm__ __volatile__ (
     // prepare interpolants
@@ -439,10 +428,6 @@ skipPixel:
         : FPU_REGS, MMX_REGS, "eax", "ecx", "edi", "cc", "memory"
   );
 
- #else
-  #error Write inline asm for your platform.
- #endif
-
 #else
 
     // !!! FIXME WARNING: I have not checked this code, and it could be
@@ -496,8 +481,7 @@ void CLayerMixer::AddAmbientMaskPoint( UBYTE *pubMask, UBYTE ubMask)
   _slLightStep>>=1;
 
 
-#if (ASMOPT == 1)
- #if (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     // prepare interpolants
     movd    mm0,D [_slL2Row]
@@ -576,7 +560,7 @@ skipPixel:
     emms
   }
 
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
   ULONG tmp1, tmp2;
   __asm__ __volatile__ (
     // prepare interpolants
@@ -660,10 +644,6 @@ skipPixel:
           "cc", "memory"
   );
 
- #else
-  #error Please write inline assembly for your platform.
- #endif
-
 #else   // Portable C version...
 
   UBYTE* pubLayer = (UBYTE*)_pulLayer;
@@ -723,8 +703,7 @@ void CLayerMixer::AddDiffusionPoint(void)
   _slLightMax<<=7;
   _slLightStep>>=1;
 
-#if ASMOPT == 1
- #if (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     // prepare interpolants
     movd    mm0,D [_slL2Row]
@@ -796,7 +775,7 @@ skipPixel:
     emms
   }
 
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
   ULONG tmp1, tmp2;
   __asm__ __volatile__ (
     // prepare interpolants
@@ -871,10 +850,6 @@ skipPixel:
         : FPU_REGS, MMX_REGS, "eax", "ecx", "edi", "cc", "memory"
   );
 
- #else
-  #error Write inline assembly for your platform.
- #endif
-
 #else
   // for each pixel in the shadow map
   UBYTE* pubLayer = (UBYTE*)_pulLayer;
@@ -929,8 +904,7 @@ void CLayerMixer::AddDiffusionMaskPoint( UBYTE *pubMask, UBYTE ubMask)
   _slLightMax<<=7;
   _slLightStep>>=1;
 
-#if (ASMOPT == 1)
- #if (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     // prepare interpolants
     movd    mm0,D [_slL2Row]
@@ -1008,7 +982,7 @@ skipPixel:
     emms
   }
 
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
   ULONG tmp1, tmp2;
   __asm__ __volatile__ (
     // prepare interpolants
@@ -1091,11 +1065,6 @@ skipPixel:
           "cc", "memory"
   );
 
- #else
-  #error Write inline ASM for your platform.
-
- #endif
-
 #else
 
   // for each pixel in the shadow map
@@ -1201,8 +1170,7 @@ BOOL CLayerMixer::PrepareOneLayerPoint( CBrushShadowLayer *pbsl, BOOL bNoMask)
   FLOAT fDL2oDV     = fDDL2oDV + 2*(lm_vStepV%v00);
   //_v00 = v00;
 
-#if ((ASMOPT == 1) && (!defined __GNU_INLINE_X86_32__))
- #if (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     fld     D [fDDL2oDU]
     fadd    D [fDDL2oDU]
@@ -1230,12 +1198,6 @@ BOOL CLayerMixer::PrepareOneLayerPoint( CBrushShadowLayer *pbsl, BOOL bNoMask)
     fistp   D [_slDDL2oDV]
     fistp   D [_slDDL2oDU]
   }
- #elif (defined __GNU_INLINE_X86_32__)
-    STUBBED("inline asm.");
- #else
-   #error Please write inline assembly for your platform.
- #endif
-
 #else
   fDDL2oDU     *= 2;
   fDDL2oDV     *= 2;
@@ -1321,8 +1283,7 @@ void CLayerMixer::AddOneLayerGradient( CGradientParameters &gp)
   _pulLayer  = lm_pulShadowMap;
   FLOAT fStart = Clamp( fGr00-(fDGroDJ+fDGroDI)*0.5f, 0.0f, 1.0f);
 
-#if ((ASMOPT == 1) && (!defined __GNU_INLINE_X86_32__))
- #if (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __int64 mmRowAdv;
   SLONG fixGRow  = (fGr00-(fDGroDJ+fDGroDI)*0.5f)*32767.0f; // 16:15
   SLONG slModulo = (lm_pixCanvasSizeU-lm_pixPolygonSizeU) *BYTES_PER_TEXEL;
@@ -1436,14 +1397,6 @@ rowNext:
 rowDone:
     emms
   }
- #elif (defined __GNU_INLINE_X86_32__)
-
-    STUBBED("WRITE ME. Argh.");
-
- #else
-  #error Need inline assembly for your platform.
- #endif
-
 #else
   // well, make gradient ...
   SLONG slR0=0,slG0=0,slB0=0;
@@ -1528,9 +1481,8 @@ rowDone:
 // apply directional light or ambient to layer
 void CLayerMixer::AddDirectional(void)
 {
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
   ULONG ulLight = ByteSwap( lm_colLight);
- #if (defined __MSVC_INLINE__)
   __asm {
     // prepare pointers and variables
     mov     edi,D [_pulLayer]
@@ -1565,7 +1517,8 @@ rowNext:
     emms
   }
 
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
+  ULONG ulLight = ByteSwap( lm_colLight);
   ULONG tmp;
   __asm__ __volatile__ (
     // prepare pointers and variables
@@ -1608,10 +1561,6 @@ rowNext:
         : FPU_REGS, "mm5", "mm6", "ecx", "edi", "cc", "memory"
   );
 
- #else
-   #error Write inline assembly for your platform.
- #endif
-
 #else
   UBYTE* pubLayer = (UBYTE*)_pulLayer;
   // for each pixel in the shadow map
@@ -1631,9 +1580,8 @@ rowNext:
 // apply directional light thru mask to layer
 void CLayerMixer::AddMaskDirectional( UBYTE *pubMask, UBYTE ubMask)
 {
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
   ULONG ulLight = ByteSwap( lm_colLight);
- #if (defined __MSVC_INLINE__)
   // prepare some local variables
   __asm {
     // prepare pointers and variables
@@ -1665,7 +1613,8 @@ skipLight:
     emms
   }
 
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
+  ULONG ulLight = ByteSwap( lm_colLight);
   ULONG tmp;
   __asm__ __volatile__ (
     // prepare pointers and variables
@@ -1706,10 +1655,6 @@ skipLight:
           "cc", "memory"
   );
 
- #else
-  #error Please write inline assembly for your platform.
- #endif
-
 #else
   UBYTE* pubLayer = (UBYTE*)_pulLayer;
   // for each pixel in the shadow map
@@ -1832,7 +1777,33 @@ void CLayerMixer::MixOneMipmap(CBrushShadowMap *pbsm, INDEX iMipmap)
     }
   } // set initial color
 
- #if (defined USE_PORTABLE_C)
+#if (defined __MSVC_INLINE__)
+  __asm {
+    cld
+    mov     ebx,D [this]
+    mov     ecx,D [ebx].lm_pixCanvasSizeU
+    imul    ecx,D [ebx].lm_pixCanvasSizeV
+    mov     edi,D [ebx].lm_pulShadowMap
+    mov     eax,D [colAmbient]
+    bswap   eax
+    rep     stosd
+  }
+
+#elif (defined __GNU_INLINE_X86_32__)
+  ULONG clob1, clob2, clob3;
+  __asm__ __volatile__ (
+    "cld                    \n\t"
+    "imull   %%esi, %%ecx   \n\t"
+    "bswapl  %%eax          \n\t"
+    "rep                    \n\t"
+    "stosl                  \n\t"
+        : "=a" (clob1), "=c" (clob2), "=D" (clob3)
+        : "c" (this->lm_pixCanvasSizeU), "S" (this->lm_pixCanvasSizeV),
+          "a" (colAmbient), "D" (this->lm_pulShadowMap)
+        : "cc", "memory"
+  );
+
+#else
   register ULONG count = this->lm_pixCanvasSizeU * this->lm_pixCanvasSizeV;
   #if PLATFORM_LITTLEENDIAN
   // Forces C fallback; BYTESWAP itself is a no-op on little endian.
@@ -1850,35 +1821,7 @@ void CLayerMixer::MixOneMipmap(CBrushShadowMap *pbsm, INDEX iMipmap)
     ptr++;
   }
 
- #elif (defined __MSVC_INLINE__)
-  __asm {
-    cld
-    mov     ebx,D [this]
-    mov     ecx,D [ebx].lm_pixCanvasSizeU
-    imul    ecx,D [ebx].lm_pixCanvasSizeV
-    mov     edi,D [ebx].lm_pulShadowMap
-    mov     eax,D [colAmbient]
-    bswap   eax
-    rep     stosd
-  }
-
- #elif (defined __GNU_INLINE_X86_32__)
-  ULONG clob1, clob2, clob3;
-  __asm__ __volatile__ (
-    "cld                    \n\t"
-    "imull   %%esi, %%ecx   \n\t"
-    "bswapl  %%eax          \n\t"
-    "rep                    \n\t"
-    "stosl                  \n\t"
-        : "=a" (clob1), "=c" (clob2), "=D" (clob3)
-        : "c" (this->lm_pixCanvasSizeU), "S" (this->lm_pixCanvasSizeV),
-          "a" (colAmbient), "D" (this->lm_pulShadowMap)
-        : "cc", "memory"
-  );
-
- #else
-  #error Please write inline assembly for your platform.
- #endif
+#endif
 
   _pfWorldEditingProfile.StopTimer(CWorldEditingProfile::PTI_AMBIENTFILL);
 
@@ -1955,9 +1898,7 @@ void CLayerMixer::MixOneMipmap(CBrushShadowMap *pbsm, INDEX iMipmap)
 // copy from static shadow map to dynamic layer
 __forceinline void CLayerMixer::CopyShadowLayer(void)
 {
- #if (defined USE_PORTABLE_C)
-   memcpy(lm_pulShadowMap, lm_pulStaticShadowMap, lm_pixCanvasSizeU*lm_pixCanvasSizeV*4);
- #elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     cld
     mov     ebx,D [this]
@@ -1967,7 +1908,7 @@ __forceinline void CLayerMixer::CopyShadowLayer(void)
     mov     edi,D [ebx].lm_pulShadowMap
     rep     movsd
   }
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
   ULONG clob1, clob2, clob3;
   __asm__ __volatile__ (
     "cld                    \n\t"
@@ -1980,21 +1921,16 @@ __forceinline void CLayerMixer::CopyShadowLayer(void)
         : "cc", "memory"
   );
 
- #else
-  #error Please write inline assembly for your platform.
- #endif
+#else
+  memcpy(lm_pulShadowMap, lm_pulStaticShadowMap, lm_pixCanvasSizeU*lm_pixCanvasSizeV*4);
+#endif
 }
 
 
 // copy from static shadow map to dynamic layer
 __forceinline void CLayerMixer::FillShadowLayer( COLOR col)
 {
- #if (defined USE_PORTABLE_C)
-   DWORD* dst = (DWORD*)lm_pulShadowMap;
-   int n = lm_pixCanvasSizeU*lm_pixCanvasSizeV;   
-   DWORD color = __builtin_bswap32(col);
-   while(n--) {*(dst++)=color;}
- #elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   __asm {
     cld
     mov     ebx,D [this]
@@ -2006,7 +1942,7 @@ __forceinline void CLayerMixer::FillShadowLayer( COLOR col)
     rep     stosd
   }
 
- #elif (defined __GNU_INLINE_X86_32__)
+#elif (defined __GNU_INLINE_X86_32__)
   ULONG clob1, clob2, clob3;
   __asm__ __volatile__ (
     "cld                    \n\t"
@@ -2020,9 +1956,12 @@ __forceinline void CLayerMixer::FillShadowLayer( COLOR col)
         : "cc", "memory"
   );
 
- #else
-  #error Please write inline assembly for your platform.
- #endif
+#else
+   DWORD* dst = (DWORD*)lm_pulShadowMap;
+   int n = lm_pixCanvasSizeU*lm_pixCanvasSizeV;   
+   DWORD color = __builtin_bswap32(col);
+   while(n--) {*(dst++)=color;}
+#endif
 }
 
 
diff --git a/Sources/Engine/Math/Float.cpp b/Sources/Engine/Math/Float.cpp
index 7c9f0fc..9f04e16 100755
--- a/Sources/Engine/Math/Float.cpp
+++ b/Sources/Engine/Math/Float.cpp
@@ -24,18 +24,9 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #define _PC_64    0x0300
 
 // !!! FIXME: I'd like to remove any dependency on the FPU control word from the game, asap.  --ryan.
-#ifdef USE_PORTABLE_C
-// Fake control87 for USE_PORTABLE_C version
-inline ULONG _control87(WORD newcw, WORD mask)
-{
-    static WORD fpw=_PC_64;
-    if (mask != 0)
-    {
-        fpw &= ~mask;
-        fpw |= (newcw & mask);
-    }
-    return(fpw);
-}
+#if (defined _MSC_VER)
+
+// _control87 is provided by the compiler
 
 #elif (defined __GNU_INLINE_X86_32__)
 
@@ -74,8 +65,20 @@ inline ULONG _control87(WORD newcw, WORD mask)
     return(fpw);
 }
 
-#elif (!defined _MSC_VER)
-#error Implement for your platform, or add a stub conditional here.
+#else
+
+// Fake control87 for USE_PORTABLE_C version
+inline ULONG _control87(WORD newcw, WORD mask)
+{
+    static WORD fpw=_PC_64;
+    if (mask != 0)
+    {
+        fpw &= ~mask;
+        fpw |= (newcw & mask);
+    }
+    return(fpw);
+}
+
 #endif
 
 /* Get current precision setting of FPU. */
diff --git a/Sources/Engine/Math/Functions.h b/Sources/Engine/Math/Functions.h
index f0e8d03..1108e71 100755
--- a/Sources/Engine/Math/Functions.h
+++ b/Sources/Engine/Math/Functions.h
@@ -312,12 +312,7 @@ inline FLOAT NormByteToFloat( const ULONG ul)
 // fast float to int conversion
 inline SLONG FloatToInt( FLOAT f)
 {
-#if defined(__arm__) || defined(USE_PORTABLE_C)
-  // round to nearest by adding/subtracting 0.5 (depending on f pos/neg) before converting to SLONG
-  float addToRound = copysignf(0.5f, f); // copy f's signbit to 0.5 => if f<0 then addToRound = -0.5, else 0.5
-  return((SLONG) (f + addToRound));
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   SLONG slRet;
   __asm {
     fld    D [f]
@@ -336,16 +331,16 @@ inline SLONG FloatToInt( FLOAT f)
   );
   return(slRet);
 #else
-  #error Fill this in for your platform.
+  // round to nearest by adding/subtracting 0.5 (depending on f pos/neg) before converting to SLONG
+  float addToRound = copysignf(0.5f, f); // copy f's signbit to 0.5 => if f<0 then addToRound = -0.5, else 0.5
+  return((SLONG) (f + addToRound));
+
 #endif
 }
 
 // log base 2 of any float numero
 inline FLOAT Log2( FLOAT f) {
-#if (defined USE_PORTABLE_C) || defined(__arm__)
-  return log2f(f);
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   FLOAT fRet;
   _asm {
     fld1
@@ -368,7 +363,8 @@ inline FLOAT Log2( FLOAT f) {
   );
   return(fRet);
 #else
-  #error Fill this in for your platform.
+  return log2f(f);
+
 #endif
 }
 
@@ -376,25 +372,7 @@ inline FLOAT Log2( FLOAT f) {
 // returns accurate values only for integers that are power of 2
 inline SLONG FastLog2( SLONG x)
 {
-#if (defined USE_PORTABLE_C)
-#ifdef __GNUC__
-  if(x == 0) return 0; // __builtin_clz() is undefined for 0
-  int numLeadingZeros  = __builtin_clz(x);
-  return 31 - numLeadingZeros;
-#else
-  register SLONG val = x;
-  register SLONG retval = 31;
-  while (retval > 0)
-  {
-    if (val & (1 << retval))
-        return retval;
-    retval--;
-  }
-
-  return 0;
-#endif
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   SLONG slRet;
   __asm {
     bsr   eax,D [x]
@@ -411,8 +389,21 @@ inline SLONG FastLog2( SLONG x)
         : "memory"
   );
   return(slRet);
+#elif (defined __GNUC__)
+  if(x == 0) return 0; // __builtin_clz() is undefined for 0
+  int numLeadingZeros  = __builtin_clz(x);
+  return 31 - numLeadingZeros;
 #else
-  #error Fill this in for your platform.
+  register SLONG val = x;
+  register SLONG retval = 31;
+  while (retval > 0)
+  {
+    if (val & (1 << retval))
+        return retval;
+    retval--;
+  }
+
+  return 0;
 #endif
 }
 
@@ -420,11 +411,7 @@ inline SLONG FastLog2( SLONG x)
 // returns log2 of first larger value that is a power of 2
 inline SLONG FastMaxLog2( SLONG x)
 { 
-#if (defined USE_PORTABLE_C)
-printf("CHECK THIS: %s:%d\n", __FILE__, __LINE__);
-  return((SLONG) log2((double) x));
-
-#elif (defined __MSVC_INLINE__)
+#if (defined __MSVC_INLINE__)
   SLONG slRet;
   __asm {
     bsr   eax,D [x]
@@ -448,7 +435,9 @@ printf("CHECK THIS: %s:%d\n", __FILE__, __LINE__);
   );
   return(slRet);
 #else
-  #error Fill this in for your platform.
+printf("CHECK THIS: %s:%d\n", __FILE__, __LINE__);
+  return((SLONG) log2((double) x));
+
 #endif
 }
 */
diff --git a/Sources/Engine/Models/RenderModel_View.cpp b/Sources/Engine/Models/RenderModel_View.cpp
index 6c574fa..73fb5ef 100644
--- a/Sources/Engine/Models/RenderModel_View.cpp
+++ b/Sources/Engine/Models/RenderModel_View.cpp
@@ -40,14 +40,6 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #define W  word ptr
 #define B  byte ptr
 
-#if (defined __MSVC_INLINE__)
-#define ASMOPT 1
-#elif (defined __GNU_INLINE_X86_32__)
-#define ASMOPT 0  // !!! FIXME: rcg10112001 Write GCC inline asm versions...
-#else
-#define ASMOPT 0
-#endif
-
 
 extern BOOL CVA_bModels;
 extern BOOL GFX_bTruform;
@@ -663,7 +655,7 @@ static FLOAT   _fHazeAdd;
 // check vertex against fog
 static void GetFogMapInVertex( GFXVertex3 &vtx, GFXTexCoord &tex)
 {
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
   __asm {
     mov     esi,D [vtx]
     mov     edi,D [tex]
@@ -708,7 +700,7 @@ static void GetFogMapInVertex( GFXVertex3 &vtx, GFXTexCoord &tex)
 // check vertex against haze
 static void GetHazeMapInVertex( GFXVertex3 &vtx, FLOAT &tx1)
 {
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
   __asm {
     mov     esi,D [vtx]
     mov     edi,D [tx1]
@@ -1080,7 +1072,7 @@ static void UnpackFrame( CRenderModel &rm, BOOL bKeepNormals)
     const ModelFrameVertex16 *pFrame1 = rm.rm_pFrame16_1;
     if( pFrame0==pFrame1)
     {
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
       // for each vertex in mip
       const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
       SLONG slTmp1, slTmp2, slTmp3;
@@ -1196,7 +1188,7 @@ vtxNext16:
     // if lerping
     else
     {
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
       // for each vertex in mip
       const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
       SLONG slTmp1, slTmp2, slTmp3;
@@ -1365,7 +1357,7 @@ vtxNext16L:
     // if no lerping
     if( pFrame0==pFrame1)
     {
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
       // for each vertex in mip
       const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
       SLONG slTmp1, slTmp2, slTmp3;
@@ -1464,7 +1456,7 @@ vtxNext8:
     // if lerping
     else
     {
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
       const SLONG fixLerpRatio = FloatToInt(fLerpRatio*256.0f); // fix 8:8
       SLONG slTmp1, slTmp2, slTmp3;
       // re-adjust stretching factors because of fixint lerping (divide by 256)
@@ -1610,7 +1602,7 @@ vtxNext8L:
   }
 
   // generate colors from shades
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
   __asm {
     pxor    mm0,mm0
     // construct 64-bit RGBA light
@@ -1974,7 +1966,7 @@ void CModelObject::RenderModel_View( CRenderModel &rm)
     pvtxSrfBase = &_avtxSrfBase[iSrfVx0];
     INDEX iSrfVx;
 
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
     __asm {
       push    ebx
       mov     ebx,D [puwSrfToMip]
@@ -2074,7 +2066,7 @@ srfVtxLoop:
     const COLOR colD = AdjustColor( ms.ms_colDiffuse, _slTexHueShift, _slTexSaturation);
     colSrfDiff.MultiplyRGBA( colD, colMdlDiff);
 
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
     // setup texcoord array
     __asm {
       push    ebx
@@ -2134,7 +2126,7 @@ vtxEnd:
       for( INDEX iSrfVx=0; iSrfVx<ctSrfVx; iSrfVx++) pcolSrfBase[iSrfVx] = colSrfDiffAdj;
     }
     else {
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
       // setup color array
       const COLOR colS = colSrfDiff.ul.abgr;
       __asm {
@@ -2335,7 +2327,7 @@ diffColLoop:
     // cache rotation
     const FLOATmatrix3D &m = rm.rm_mObjectRotation;
 
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
     __asm {
       push    ebx
       mov     ebx,D [m]
@@ -2530,7 +2522,7 @@ reflMipLoop:
     // cache object view rotation
     const FLOATmatrix3D &m = rm.rm_mObjectToView;
 
-#if ASMOPT == 1
+#if (defined __MSVC_INLINE__)
     __asm {
       push    ebx
       mov     ebx,D [m]
diff --git a/Sources/Engine/Rendering/RendMisc.cpp b/Sources/Engine/Rendering/RendMisc.cpp
index 3d0e657..79a0a96 100644
--- a/Sources/Engine/Rendering/RendMisc.cpp
+++ b/Sources/Engine/Rendering/RendMisc.cpp
@@ -105,10 +105,7 @@ static SLONG slTmp;
 
 static inline PIX PIXCoord(FLOAT f) // (f+0.9999f) or (ceil(f))
 {
- #if (defined USE_PORTABLE_C)
-  return((PIX) (f+0.9999f));
-
- #elif (defined __MSVC_INLINE__)
+ #if (defined __MSVC_INLINE__)
   PIX pixRet;
   __asm {
     fld     dword ptr [f]
@@ -142,7 +139,8 @@ static inline PIX PIXCoord(FLOAT f) // (f+0.9999f) or (ceil(f))
   return pixRet;
 
  #else
-  #error Please write inline ASM for your platform.
+  return((PIX) (f+0.9999f));
+
  #endif
 }
 
diff --git a/Sources/Engine/Sound/SoundMixer.cpp b/Sources/Engine/Sound/SoundMixer.cpp
index 82a47fa..adc16a0 100644
--- a/Sources/Engine/Sound/SoundMixer.cpp
+++ b/Sources/Engine/Sound/SoundMixer.cpp
@@ -43,17 +43,15 @@ static CSoundData *psd;
 
 // nasm on MacOS X is getting wrong addresses of external globals, so I have
 //  to define them in the .asm file...lame.
-#ifdef __GNU_INLINE_X86_32__
-#ifdef USE_PORTABLE_C
-#define INASM 
-#else
+#if (defined __GNU_INLINE_X86_32__)
 #define INASM extern
-#endif
-#else
+#elif (defined __MSVC_INLINE__)
 #define INASM static
 static __int64 mmInvFactor   = 0x00007FFF00007FFF;
 static FLOAT f65536 = 65536.0f;
 static FLOAT f4G    = 4294967296.0f;
+#else
+#define INASM static
 #endif
 
 INASM SLONG slMixerBufferSize;        // size in samples per channel of the destination buffers
@@ -81,11 +79,7 @@ void ResetMixer( const SLONG *pslBuffer, const SLONG slBufferSize)
   slMixerBufferSampleRate = _pSound->sl_SwfeFormat.nSamplesPerSec;
 
   // wipe destination mixer buffer
-  // (Mac OS X uses this path because Apple's memset() is customized for each CPU they support and way faster than this inline asm. --ryan.)
-  #if ((defined USE_PORTABLE_C) || (PLATFORM_MACOSX))
-  memset(pvMixerBuffer, 0, slMixerBufferSize * 8);
-
-  #elif (defined __MSVC_INLINE__)
+  #if (defined __MSVC_INLINE__)
   __asm {
     cld
     xor     eax,eax
@@ -94,19 +88,8 @@ void ResetMixer( const SLONG *pslBuffer, const SLONG slBufferSize)
     shl     ecx,1 // *2 because of 32-bit src format
     rep     stosd
   }
-  #elif (defined __GNU_INLINE_X86_32__)
-  // !!! FIXME : rcg12172001 Is this REALLY any faster than memset()?
-  ULONG clob1, clob2;
-  __asm__ __volatile__ (
-    "cld                  \n\t"
-    "rep                  \n\t"
-    "stosl                \n\t"
-        : "=D" (clob1), "=c" (clob2)
-        : "a" (0), "D" (pvMixerBuffer), "c" (slMixerBufferSize*2)
-        : "cc", "memory"
-  );
   #else
-    #error please write inline asm for your platform.
+  memset(pvMixerBuffer, 0, slMixerBufferSize * 8);
   #endif
 }
 
@@ -118,10 +101,7 @@ void CopyMixerBuffer_stereo( const SLONG slSrcOffset, void *pDstBuffer, const SL
   ASSERT( slBytes%4==0);
   if( slBytes<4) return;
 
-  #if ((defined USE_PORTABLE_C) || (PLATFORM_MACOSX))
-  // (Mac OS X uses this path because Apple's memset() is customized for each CPU they support and way faster than this inline asm. --ryan.)
-  memcpy(pDstBuffer, ((const char *)pvMixerBuffer) + slSrcOffset, slBytes);
-  #elif (defined __MSVC_INLINE__)
+  #if (defined __MSVC_INLINE__)
   __asm {
     cld
     mov     esi,D [slSrcOffset]
@@ -131,21 +111,8 @@ void CopyMixerBuffer_stereo( const SLONG slSrcOffset, void *pDstBuffer, const SL
     shr     ecx,2   // bytes to samples per channel
     rep     movsd
   }
-  #elif (defined __GNU_INLINE_X86_32__)
-  // !!! FIXME : rcg12172001 Is this REALLY any faster than memcpy()?
-  ULONG clob1, clob2, clob3;
-  __asm__ __volatile__ (
-    "cld                 \n\t"
-    "rep                 \n\t"
-    "movsl               \n\t"
-      : "=S" (clob1), "=D" (clob2), "=c" (clob3)
-      : "S" (((char *)pvMixerBuffer) + slSrcOffset),
-        "D" (pDstBuffer),
-        "c" (slBytes >> 2)
-      : "cc", "memory"
-  );
   #else
-  #error please write inline asm for your platform.
+  memcpy(pDstBuffer, ((const char *)pvMixerBuffer) + slSrcOffset, slBytes);
   #endif
 }
 
@@ -157,18 +124,7 @@ void CopyMixerBuffer_mono( const SLONG slSrcOffset, void *pDstBuffer, const SLON
   ASSERT( slBytes%2==0);
   if( slBytes<4) return;
 
-  #if (defined USE_PORTABLE_C)
-  // (This is untested, currently. --ryan.)
-  WORD *dest = (WORD *) pDstBuffer;
-  WORD *src = (WORD *) ( ((char *) pvMixerBuffer) + slSrcOffset );
-  SLONG max = slBytes / 4;
-  for (SLONG i = 0; i < max; i++) {
-      *dest = *src;
-      dest++;    // move 16 bits.
-      src+=2;    // move 32 bits.
-  }
-
-  #elif (defined __MSVC_INLINE__)
+  #if (defined __MSVC_INLINE__)
   __asm {
     mov     esi,D [slSrcOffset]
     add     esi,D [pvMixerBuffer]
@@ -204,7 +160,15 @@ copyLoop:
   );
 
   #else
-  #error please write inline asm for your platform.
+  // (This is untested, currently. --ryan.)
+  WORD *dest = (WORD *) pDstBuffer;
+  WORD *src = (WORD *) ( ((char *) pvMixerBuffer) + slSrcOffset );
+  SLONG max = slBytes / 4;
+  for (SLONG i = 0; i < max; i++) {
+      *dest = *src;
+      dest++;    // move 16 bits.
+      src+=2;    // move 32 bits.
+  }
   #endif
 }
 
@@ -215,24 +179,7 @@ static void ConvertMixerBuffer( const SLONG slBytes)
   ASSERT( slBytes%4==0);
   if( slBytes<4) return;
 
-  #if (defined USE_PORTABLE_C)
-  //STUBBED("ConvertMixerBuffer");
-  SWORD *dest = (SWORD *) pvMixerBuffer;
-  SLONG *src = (SLONG *) pvMixerBuffer;
-  SLONG max = slBytes / 2;
-  int tmp;
-  for (SLONG i = 0; i < max; i++) {
-      tmp = *src;
-      if (tmp>32767) tmp=32767;
-      if (tmp<-32767) tmp=-32767;
-      *dest=tmp;
-      dest++;    // move 16 bits.
-      src++;     // move 32 bits.
-  }
-
-
-
-  #elif (defined __MSVC_INLINE__)
+  #if (defined __MSVC_INLINE__)
   __asm {
     cld
     mov     esi,D [pvMixerBuffer]
@@ -271,7 +218,20 @@ copyLoop:
   );
 
   #else
-  #error please write inline asm for your platform.
+
+  SWORD *dest = (SWORD *) pvMixerBuffer;
+  SLONG *src = (SLONG *) pvMixerBuffer;
+  SLONG max = slBytes / 2;
+  int tmp;
+  for (SLONG i = 0; i < max; i++) {
+      tmp = *src;
+      if (tmp>32767) tmp=32767;
+      if (tmp<-32767) tmp=-32767;
+      *dest=tmp;
+      dest++;    // move 16 bits.
+      src++;     // move 32 bits.
+  }
+
   #endif
 }
 
@@ -337,85 +297,7 @@ inline void MixMono( CSoundObject *pso)
 {
   _pfSoundProfile.StartTimer(CSoundProfile::PTI_RAWMIXER);
 
- #if (defined USE_PORTABLE_C)
-  // initialize some local vars
-  SLONG slLeftSample, slRightSample, slNextSample;
-  SLONG *pslDstBuffer = (SLONG*)pvMixerBuffer;
-  fixLeftOfs   = (__int64)(fLeftOfs   * 65536.0);
-  fixRightOfs  = (__int64)(fRightOfs  * 65536.0);
-  __int64 fixLeftStep  = (__int64)(fLeftStep  * 65536.0);
-  __int64 fixRightStep = (__int64)(fRightStep * 65536.0);
-  __int64 fixSoundBufferSize = ((__int64)slSoundBufferSize)<<16;
-  mmSurroundFactor = (__int64)(SWORD)mmSurroundFactor;
-
-  SLONG slLeftVolume_ = slLeftVolume >> 16;
-  SLONG slRightVolume_ = slRightVolume >> 16;
-
-  // loop thru source buffer
-  INDEX iCt = slMixerBufferSize;
-  FOREVER
-  {
-    // if left channel source sample came to end of sample buffer
-    if( fixLeftOfs >= fixSoundBufferSize) {
-      fixLeftOfs -= fixSoundBufferSize;
-      // if has no loop, end it
-      bEndOfSound = bNotLoop;
-    }
-    // if right channel source sample came to end of sample buffer
-    if( fixRightOfs >= fixSoundBufferSize) {
-      fixRightOfs -= fixSoundBufferSize;
-      // if has no loop, end it
-      bEndOfSound = bNotLoop;
-    }
-    // end of buffer?
-    if( iCt<=0 || bEndOfSound) break;
-
-    // fetch one lineary interpolated sample on left channel
-    slLeftSample = pswSrcBuffer[(fixLeftOfs>>16)+0];
-    slNextSample = pswSrcBuffer[(fixLeftOfs>>16)+1];
-    slLeftSample = (slLeftSample*(65535-(fixLeftOfs&65535)) + slNextSample*(fixLeftOfs&65535)) >>16;
-    // fetch one lineary interpolated sample on right channel
-    slRightSample = pswSrcBuffer[(fixRightOfs>>16)+0];
-    slNextSample  = pswSrcBuffer[(fixRightOfs>>16)+1];
-    slRightSample = (slRightSample*(65535-(fixRightOfs&65535)) + slNextSample*(fixRightOfs&65535)) >>16;
-
-    // filter samples
-    slLastLeftSample  += ((slLeftSample -slLastLeftSample) *slLeftFilter) >>15;
-    slLastRightSample += ((slRightSample-slLastRightSample)*slRightFilter)>>15;
-
-    // apply stereo volume to current sample
-    slLeftSample  = (slLastLeftSample  * slLeftVolume_) >>15;
-    slRightSample = (slLastRightSample * slRightVolume_)>>15;
-
-    slLeftSample  ^= (SLONG)((mmSurroundFactor>> 0)&0xFFFFFFFF);
-    slRightSample ^= (SLONG)((mmSurroundFactor>>32)&0xFFFFFFFF);
-
-    // mix in current sample
-    slLeftSample  += pslDstBuffer[0];
-    slRightSample += pslDstBuffer[1];
-    // upper clamp
-    if( slLeftSample  > MAX_SWORD) slLeftSample  = MAX_SWORD;
-    if( slRightSample > MAX_SWORD) slRightSample = MAX_SWORD;
-    // lower clamp
-    if( slLeftSample  < MIN_SWORD) slLeftSample  = MIN_SWORD;
-    if( slRightSample < MIN_SWORD) slRightSample = MIN_SWORD;
-
-    // store samples (both channels)
-    pslDstBuffer[0] = slLeftSample;
-    pslDstBuffer[1] = slRightSample;
-
-    // modify volume  `
-    slLeftVolume  += (SWORD)((mmVolumeGain>> 0)&0xFFFF);
-    slRightVolume += (SWORD)((mmVolumeGain>>16)&0xFFFF);
-
-    // advance to next sample
-    fixLeftOfs   += fixLeftStep;
-    fixRightOfs  += fixRightStep;
-    pslDstBuffer += 2;
-    iCt--;
-  }
-
- #elif (defined __MSVC_INLINE__)
+ #if (defined __MSVC_INLINE__)
   __asm {
     // convert from floats to fixints 32:16
     fld     D [fLeftOfs]
@@ -553,19 +435,6 @@ loopEnd:
    MixMono_asm(pso);
 
  #else
-   #error please write inline asm for your platform.
- #endif
-
-  _pfSoundProfile.StopTimer(CSoundProfile::PTI_RAWMIXER);
-}
-
-
-// mixes one stereo 16-bit signed sound to destination buffer
-inline void MixStereo( CSoundObject *pso)
-{
-  _pfSoundProfile.StartTimer(CSoundProfile::PTI_RAWMIXER);
-
- #if (defined USE_PORTABLE_C)
   // initialize some local vars
   SLONG slLeftSample, slRightSample, slNextSample;
   SLONG *pslDstBuffer = (SLONG*)pvMixerBuffer;
@@ -599,12 +468,12 @@ inline void MixStereo( CSoundObject *pso)
     if( iCt<=0 || bEndOfSound) break;
 
     // fetch one lineary interpolated sample on left channel
-    slLeftSample = pswSrcBuffer[(fixLeftOfs>>15)+0];
-    slNextSample = pswSrcBuffer[(fixLeftOfs>>15)+2];
+    slLeftSample = pswSrcBuffer[(fixLeftOfs>>16)+0];
+    slNextSample = pswSrcBuffer[(fixLeftOfs>>16)+1];
     slLeftSample = (slLeftSample*(65535-(fixLeftOfs&65535)) + slNextSample*(fixLeftOfs&65535)) >>16;
     // fetch one lineary interpolated sample on right channel
-    slRightSample = pswSrcBuffer[(fixRightOfs>>15)+0];
-    slNextSample  = pswSrcBuffer[(fixRightOfs>>15)+2];
+    slRightSample = pswSrcBuffer[(fixRightOfs>>16)+0];
+    slNextSample  = pswSrcBuffer[(fixRightOfs>>16)+1];
     slRightSample = (slRightSample*(65535-(fixRightOfs&65535)) + slNextSample*(fixRightOfs&65535)) >>16;
 
     // filter samples
@@ -643,7 +512,18 @@ inline void MixStereo( CSoundObject *pso)
     iCt--;
   }
 
- #elif (defined __MSVC_INLINE__)
+ #endif
+
+  _pfSoundProfile.StopTimer(CSoundProfile::PTI_RAWMIXER);
+}
+
+
+// mixes one stereo 16-bit signed sound to destination buffer
+inline void MixStereo( CSoundObject *pso)
+{
+  _pfSoundProfile.StartTimer(CSoundProfile::PTI_RAWMIXER);
+
+ #if (defined __MSVC_INLINE__)
   __asm {
     // convert from floats to fixints 32:16
     fld     D [fLeftOfs]
@@ -783,7 +663,83 @@ loopEnd:
    MixStereo_asm(pso);
 
  #else
-   #error please write inline asm for your platform.
+  // initialize some local vars
+  SLONG slLeftSample, slRightSample, slNextSample;
+  SLONG *pslDstBuffer = (SLONG*)pvMixerBuffer;
+  fixLeftOfs   = (__int64)(fLeftOfs   * 65536.0);
+  fixRightOfs  = (__int64)(fRightOfs  * 65536.0);
+  __int64 fixLeftStep  = (__int64)(fLeftStep  * 65536.0);
+  __int64 fixRightStep = (__int64)(fRightStep * 65536.0);
+  __int64 fixSoundBufferSize = ((__int64)slSoundBufferSize)<<16;
+  mmSurroundFactor = (__int64)(SWORD)mmSurroundFactor;
+
+  SLONG slLeftVolume_ = slLeftVolume >> 16;
+  SLONG slRightVolume_ = slRightVolume >> 16;
+
+  // loop thru source buffer
+  INDEX iCt = slMixerBufferSize;
+  FOREVER
+  {
+    // if left channel source sample came to end of sample buffer
+    if( fixLeftOfs >= fixSoundBufferSize) {
+      fixLeftOfs -= fixSoundBufferSize;
+      // if has no loop, end it
+      bEndOfSound = bNotLoop;
+    }
+    // if right channel source sample came to end of sample buffer
+    if( fixRightOfs >= fixSoundBufferSize) {
+      fixRightOfs -= fixSoundBufferSize;
+      // if has no loop, end it
+      bEndOfSound = bNotLoop;
+    }
+    // end of buffer?
+    if( iCt<=0 || bEndOfSound) break;
+
+    // fetch one lineary interpolated sample on left channel
+    slLeftSample = pswSrcBuffer[(fixLeftOfs>>15)+0];
+    slNextSample = pswSrcBuffer[(fixLeftOfs>>15)+2];
+    slLeftSample = (slLeftSample*(65535-(fixLeftOfs&65535)) + slNextSample*(fixLeftOfs&65535)) >>16;
+    // fetch one lineary interpolated sample on right channel
+    slRightSample = pswSrcBuffer[(fixRightOfs>>15)+0];
+    slNextSample  = pswSrcBuffer[(fixRightOfs>>15)+2];
+    slRightSample = (slRightSample*(65535-(fixRightOfs&65535)) + slNextSample*(fixRightOfs&65535)) >>16;
+
+    // filter samples
+    slLastLeftSample  += ((slLeftSample -slLastLeftSample) *slLeftFilter) >>15;
+    slLastRightSample += ((slRightSample-slLastRightSample)*slRightFilter)>>15;
+
+    // apply stereo volume to current sample
+    slLeftSample  = (slLastLeftSample  * slLeftVolume_) >>15;
+    slRightSample = (slLastRightSample * slRightVolume_)>>15;
+
+    slLeftSample  ^= (SLONG)((mmSurroundFactor>> 0)&0xFFFFFFFF);
+    slRightSample ^= (SLONG)((mmSurroundFactor>>32)&0xFFFFFFFF);
+
+    // mix in current sample
+    slLeftSample  += pslDstBuffer[0];
+    slRightSample += pslDstBuffer[1];
+    // upper clamp
+    if( slLeftSample  > MAX_SWORD) slLeftSample  = MAX_SWORD;
+    if( slRightSample > MAX_SWORD) slRightSample = MAX_SWORD;
+    // lower clamp
+    if( slLeftSample  < MIN_SWORD) slLeftSample  = MIN_SWORD;
+    if( slRightSample < MIN_SWORD) slRightSample = MIN_SWORD;
+
+    // store samples (both channels)
+    pslDstBuffer[0] = slLeftSample;
+    pslDstBuffer[1] = slRightSample;
+
+    // modify volume  `
+    slLeftVolume  += (SWORD)((mmVolumeGain>> 0)&0xFFFF);
+    slRightVolume += (SWORD)((mmVolumeGain>>16)&0xFFFF);
+
+    // advance to next sample
+    fixLeftOfs   += fixLeftStep;
+    fixRightOfs  += fixRightStep;
+    pslDstBuffer += 2;
+    iCt--;
+  }
+
  #endif
 
   _pfSoundProfile.StopTimer(CSoundProfile::PTI_RAWMIXER);