From 52c178718ca888437eee900a5653ddf286a25f14 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 22 Apr 2016 02:25:24 +0300 Subject: [PATCH] fix SoundMixer asm SoundMixer386.asm forgot to save some callee-save registers too --- Sources/Engine/Sound/SoundMixer.cpp | 24 ++++++++++++++++-------- Sources/Engine/Sound/SoundMixer386.asm | 8 ++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Sources/Engine/Sound/SoundMixer.cpp b/Sources/Engine/Sound/SoundMixer.cpp index 6a9175a..e24ace2 100644 --- a/Sources/Engine/Sound/SoundMixer.cpp +++ b/Sources/Engine/Sound/SoundMixer.cpp @@ -96,11 +96,12 @@ void ResetMixer( const SLONG *pslBuffer, const SLONG slBufferSize) } #elif (defined __GNU_INLINE__) // !!! FIXME : rcg12172001 Is this REALLY any faster than memset()? + ULONG clob1, clob2; __asm__ __volatile__ ( "cld \n\t" "rep \n\t" "stosl \n\t" - : // no outputs. + : "=D" (clob1), "=c" (clob2) : "a" (0), "D" (pvMixerBuffer), "c" (slMixerBufferSize*2) : "cc", "memory" ); @@ -132,11 +133,12 @@ void CopyMixerBuffer_stereo( const SLONG slSrcOffset, void *pDstBuffer, const SL } #elif (defined __GNU_INLINE__) // !!! FIXME : rcg12172001 Is this REALLY any faster than memcpy()? + ULONG clob1, clob2, clob3; __asm__ __volatile__ ( "cld \n\t" "rep \n\t" "movsl \n\t" - : // no outputs. + : "=S" (clob1), "=D" (clob2), "=c" (clob3) : "S" (((char *)pvMixerBuffer) + slSrcOffset), "D" (pDstBuffer), "c" (slBytes >> 2) @@ -184,6 +186,9 @@ copyLoop: #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( + "movl %[pvMixerBuffer], %%esi \n\t" + "movl %[pDstBuffer], %%edi \n\t" + "movl %[slDW], %%ecx \n\t" "0: \n\t" // copyLoop "movzwl (%%esi), %%eax \n\t" "movw %%ax, (%%edi) \n\t" @@ -192,10 +197,10 @@ copyLoop: "decl %%ecx \n\t" "jnz 0b \n\t" // copyLoop : // no outputs. - : "S" (((char *)pvMixerBuffer) + slSrcOffset), - "D" (pDstBuffer), - "c" (slBytes >> 2) - : "cc", "memory", "eax" + : [pvMixerBuffer] "g" (((char *)pvMixerBuffer) + slSrcOffset), + [pDstBuffer] "g" (pDstBuffer), + [slDW] "g" (slBytes >> 2) + : "eax", "ecx", "esi", "edi", "cc", "memory" ); #else @@ -247,6 +252,9 @@ copyLoop: #elif (defined __GNU_INLINE__) __asm__ __volatile__ ( + "movl %[pvMixerBuffer], %%esi \n\t" + "movl %[pvMixerBuffer], %%edi \n\t" + "movl %[slDW], %%ecx \n\t" "cld \n\t" "0: \n\t" // copyLoop "movq (%%esi), %%mm0 \n\t" @@ -258,8 +266,8 @@ copyLoop: "jnz 0b \n\t" // copyLoop "emms \n\t" : // no outputs. - : "S" (pvMixerBuffer), "D" (pvMixerBuffer), "c" (slBytes >> 2) - : "cc", "memory" + : [pvMixerBuffer] "g" (pvMixerBuffer), [slDW] "g" (slBytes >> 2) + : FPU_REGS, "mm0", "ecx", "esi", "edi", "cc", "memory" ); #else diff --git a/Sources/Engine/Sound/SoundMixer386.asm b/Sources/Engine/Sound/SoundMixer386.asm index 9231c0c..bedc390 100644 --- a/Sources/Engine/Sound/SoundMixer386.asm +++ b/Sources/Engine/Sound/SoundMixer386.asm @@ -96,6 +96,8 @@ SEGMENT .text global MixMono_asm MixMono_asm: push ebx ; Save GCC register. + push esi + push edi ; convert from floats to fixints 32:16 fld D [fLeftOfs] fmul D [f65536] @@ -224,6 +226,8 @@ loopEnd_MixMono: shr edx,16 mov D [slLastLeftSample],eax mov D [slLastRightSample],edx + pop edi + pop esi pop ebx ; Restore GCC register. emms ret @@ -232,6 +236,8 @@ loopEnd_MixMono: global MixStereo_asm MixStereo_asm: push ebx ; Save GCC register. + push esi + push edi ; convert from floats to fixints 32:16 fld D [fLeftOfs] fmul D [f65536] @@ -363,6 +369,8 @@ loopEnd_MixStereo: mov D [slLastLeftSample],eax mov D [slLastRightSample],edx emms + pop edi + pop esi pop ebx ; Restore GCC register. ret