Serious-Engine/Sources/Engine/Sound/SoundMixer.cpp
Ryan C. Gordon 9820436fbe First pass at cleaning out 64-bit issues.
Touches a lot of code to remove long constants like "1L", so this patch is
large and ugly, but I think it makes all those Clamp() calls look nicer in
the long run.

Most of the game is 64-bit clean, since we can build without assembly code
now. I've marked the things that are obviously still wrong with STUBBED lines.

That being said: a 64-bit build can already run the demos mostly correctly,
so we're actually almost there!

There are a few obvious things that are obviously wrong, to be fixed.
2016-04-06 23:20:29 -04:00

1041 lines
35 KiB
C++

/* Copyright (c) 2002-2012 Croteam Ltd.
This program is free software; you can redistribute it and/or modify
it under the terms of version 2 of the GNU General Public License as published by
the Free Software Foundation
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
#include "Engine/StdH.h"
#include <Engine/Sound/SoundProfile.h>
#include <Engine/Sound/SoundDecoder.h>
#include <Engine/Sound/SoundLibrary.h>
#include <Engine/Sound/SoundData.h>
#include <Engine/Sound/SoundObject.h>
#include <Engine/Base/Statistics_Internal.h>
#include <Engine/Base/Console.h>
// asm shortcuts
#define O offset
#define Q qword ptr
#define D dword ptr
#define W word ptr
#define B byte ptr
// console variables for volume
extern FLOAT snd_fSoundVolume;
extern FLOAT snd_fMusicVolume;
extern INDEX snd_bMono;
// a bunch of local vars coming up
static SLONG slMixerBufferSampleRate; // quality of destination buffer
static CSoundData *psd;
// nasm on MacOS X is getting wrong addresses of external globals, so I have
// to define them in the .asm file...lame.
#ifdef __GNU_INLINE__
#ifdef USE_PORTABLE_C
#define INASM
#else
#define INASM extern
#endif
#else
#define INASM static
static __int64 mmInvFactor = 0x00007FFF00007FFF;
static FLOAT f65536 = 65536.0f;
static FLOAT f4G = 4294967296.0f;
#endif
INASM SLONG slMixerBufferSize; // size in samples per channel of the destination buffers
INASM void *pvMixerBuffer; // pointer to the start of the destination buffers
INASM SWORD *pswSrcBuffer;
INASM SLONG slLeftVolume, slRightVolume, slLeftFilter, slRightFilter;
INASM SLONG slLastLeftSample, slLastRightSample, slSoundBufferSize;
INASM FLOAT fSoundSampleRate, fPhase;
INASM FLOAT fOfsDelta, fStep, fLeftStep, fRightStep, fLeftOfs, fRightOfs;
INASM __int64 fixLeftOfs, fixRightOfs; // fixed integers 32:32
INASM __int64 mmSurroundFactor, mmLeftStep, mmRightStep, mmVolumeGain;
INASM BOOL bNotLoop, bEndOfSound;
// reset mixer buffer (wipes it with zeroes and remembers pointers in static mixer variables)
void ResetMixer( const SLONG *pslBuffer, const SLONG slBufferSize)
{
// clamp master volumes
snd_fSoundVolume = Clamp(snd_fSoundVolume, 0.0f, 1.0f);
snd_fMusicVolume = Clamp(snd_fMusicVolume, 0.0f, 1.0f);
// cache local variables
ASSERT( slBufferSize%4==0);
pvMixerBuffer = (void*)pslBuffer;
slMixerBufferSize = slBufferSize /2/2; // because it's stereo and 16-bit dst format
slMixerBufferSampleRate = _pSound->sl_SwfeFormat.nSamplesPerSec;
// wipe destination mixer buffer
// (Mac OS X uses this path because Apple's memset() is customized for each CPU they support and way faster than this inline asm. --ryan.)
#if ((defined USE_PORTABLE_C) || (PLATFORM_MACOSX))
memset(pvMixerBuffer, 0, slMixerBufferSize * 8);
#elif (defined __MSVC_INLINE__)
__asm {
cld
xor eax,eax
mov edi,D [pvMixerBuffer]
mov ecx,D [slMixerBufferSize]
shl ecx,1 // *2 because of 32-bit src format
rep stosd
}
#elif (defined __GNU_INLINE__)
// !!! FIXME : rcg12172001 Is this REALLY any faster than memset()?
__asm__ __volatile__ (
"cld \n\t"
"rep \n\t"
"stosl \n\t"
: // no outputs.
: "a" (0), "D" (pvMixerBuffer), "c" (slMixerBufferSize*2)
: "cc", "memory"
);
#else
#error please write inline asm for your platform.
#endif
}
// copy mixer buffer to the output buffer(s)
void CopyMixerBuffer_stereo( const SLONG slSrcOffset, void *pDstBuffer, const SLONG slBytes)
{
ASSERT( pDstBuffer!=NULL);
ASSERT( slBytes%4==0);
if( slBytes<4) return;
#if ((defined USE_PORTABLE_C) || (PLATFORM_MACOSX))
// (Mac OS X uses this path because Apple's memset() is customized for each CPU they support and way faster than this inline asm. --ryan.)
memcpy(pDstBuffer, ((const char *)pvMixerBuffer) + slSrcOffset, slBytes);
#elif (defined __MSVC_INLINE__)
__asm {
cld
mov esi,D [slSrcOffset]
add esi,D [pvMixerBuffer]
mov edi,D [pDstBuffer]
mov ecx,D [slBytes]
shr ecx,2 // bytes to samples per channel
rep movsd
}
#elif (defined __GNU_INLINE__)
// !!! FIXME : rcg12172001 Is this REALLY any faster than memcpy()?
__asm__ __volatile__ (
"cld \n\t"
"rep \n\t"
"movsl \n\t"
: // no outputs.
: "S" (((char *)pvMixerBuffer) + slSrcOffset),
"D" (pDstBuffer),
"c" (slBytes >> 2)
: "cc", "memory"
);
#else
#error please write inline asm for your platform.
#endif
}
// copy one channel from mixer buffer to the output buffer(s)
void CopyMixerBuffer_mono( const SLONG slSrcOffset, void *pDstBuffer, const SLONG slBytes)
{
ASSERT( pDstBuffer!=NULL);
ASSERT( slBytes%2==0);
if( slBytes<4) return;
#if (defined USE_PORTABLE_C)
// (This is untested, currently. --ryan.)
WORD *dest = (WORD *) pDstBuffer;
WORD *src = (WORD *) ( ((char *) pvMixerBuffer) + slSrcOffset );
SLONG max = slBytes / 4;
for (SLONG i = 0; i < max; i++) {
*dest = *src;
dest++; // move 16 bits.
src+=2; // move 32 bits.
}
#elif (defined __MSVC_INLINE__)
__asm {
mov esi,D [slSrcOffset]
add esi,D [pvMixerBuffer]
mov edi,D [pDstBuffer]
mov ecx,D [slBytes]
shr ecx,2 // bytes to samples
copyLoop:
movzx eax,W [esi]
mov W [edi],ax
add esi,4
add edi,2
dec ecx
jnz copyLoop
}
#elif (defined __GNU_INLINE__)
__asm__ __volatile__ (
"0: \n\t" // copyLoop
"movzwl (%%esi), %%eax \n\t"
"movw %%ax, (%%edi) \n\t"
"addl $4, %%esi \n\t"
"addl $2, %%edi \n\t"
"decl %%ecx \n\t"
"jnz 0b \n\t" // copyLoop
: // no outputs.
: "S" (((char *)pvMixerBuffer) + slSrcOffset),
"D" (pDstBuffer),
"c" (slBytes >> 2)
: "cc", "memory", "eax"
);
#else
#error please write inline asm for your platform.
#endif
}
// plain conversion of mixer buffer from 32-bit to 16-bit clamped
static void ConvertMixerBuffer( const SLONG slBytes)
{
ASSERT( slBytes%4==0);
if( slBytes<4) return;
#if (defined USE_PORTABLE_C)
//STUBBED("ConvertMixerBuffer");
SWORD *dest = (SWORD *) pvMixerBuffer;
SLONG *src = (SLONG *) pvMixerBuffer;
SLONG max = slBytes / 2;
int tmp;
for (SLONG i = 0; i < max; i++) {
tmp = *src;
if (tmp>32767) tmp=32767;
if (tmp<-32767) tmp=-32767;
*dest=tmp;
dest++; // move 16 bits.
src++; // move 32 bits.
}
#elif (defined __MSVC_INLINE__)
__asm {
cld
mov esi,D [pvMixerBuffer]
mov edi,D [pvMixerBuffer]
mov ecx,D [slBytes]
shr ecx,2 // bytes to samples (2 channels)
copyLoop:
movq mm0,Q [esi]
packssdw mm0,mm0
movd D [edi],mm0
add esi,8
add edi,4
dec ecx
jnz copyLoop
emms
}
#elif (defined __GNU_INLINE__)
__asm__ __volatile__ (
"cld \n\t"
"0: \n\t" // copyLoop
"movq (%%esi), %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%%edi) \n\t"
"addl $8, %%esi \n\t"
"addl $4, %%edi \n\t"
"decl %%ecx \n\t"
"jnz 0b \n\t" // copyLoop
"emms \n\t"
: // no outputs.
: "S" (pvMixerBuffer), "D" (pvMixerBuffer), "c" (slBytes >> 2)
: "cc", "memory"
);
#else
#error please write inline asm for your platform.
#endif
}
// normalize mixer buffer
void NormalizeMixerBuffer( const FLOAT fNormStrength, const SLONG slBytes, FLOAT &fLastNormValue)
{
// just convert to 16-bit if normalization isn't required
ASSERT( slBytes%4==0);
if( slBytes<8) return;
if( fNormStrength<0.01f) {
ConvertMixerBuffer(slBytes);
return;
}
// well, I guess we'll might need to normalize a bit, so first - find maximum
INDEX i;
SLONG slPeak = 0;
SLONG *pslSrc = (SLONG*)pvMixerBuffer;
const INDEX iSamples = slBytes/2; // 16-bit was assumed -> samples (treat as mono)
for( i=0; i<iSamples; i++) slPeak = Max( Abs(pslSrc[i]), slPeak);
// determine normalize value and skip normalization if maximize is required (do not increase volume!)
FLOAT fNormValue = 32767.0f / (FLOAT)slPeak;
if( fNormValue>0.99f && fLastNormValue>0.99f) { // should be enough to tolerate
fLastNormValue = 1.0f;
ConvertMixerBuffer(slBytes);
return;
}
// adjust normalize value by strength
ASSERT( fNormStrength>=0 && fNormStrength<=1);
fNormValue = Lerp( 1.0f, fNormValue, fNormStrength);
const FLOAT fNormAdd = (fNormValue-fLastNormValue) / (iSamples/4);
// normalize (and convert to 16-bit)
SWORD *pswDst = (SWORD*)pvMixerBuffer;
FLOAT fCurrentNormValue = fLastNormValue;
for( i=0; i<iSamples; i++) {
SLONG slSample = FloatToInt(pslSrc[i]*fCurrentNormValue);
pswDst[i] = (SWORD)Clamp( slSample, -32767, +32767);
fCurrentNormValue = fCurrentNormValue+fNormAdd; // interpolate normalizer
if( fCurrentNormValue<fNormValue && fNormAdd<0) fCurrentNormValue = fNormValue; // clamp interpolated value
else if( fCurrentNormValue>fNormValue && fNormAdd>0) fCurrentNormValue = fNormValue;
}
// CPrintF( "%.5f -> %.5f (%.5f) @ %.9f / %d\n", fLastNormValue, fCurrentNormValue, fNormValue, fNormAdd, iSamples);
// remember normalization value
fLastNormValue = fCurrentNormValue;
}
#ifdef __GNU_INLINE__
// These are implemented in an external NASM file.
extern "C" {
void MixStereo_asm(CSoundObject *pso);
void MixMono_asm(CSoundObject *pso);
}
#endif
// mixes one mono 16-bit signed sound to destination buffer
inline void MixMono( CSoundObject *pso)
{
_pfSoundProfile.StartTimer(CSoundProfile::PTI_RAWMIXER);
#if (defined USE_PORTABLE_C)
// initialize some local vars
SLONG slLeftSample, slRightSample, slNextSample;
SLONG *pslDstBuffer = (SLONG*)pvMixerBuffer;
fixLeftOfs = (__int64)(fLeftOfs * 65536.0);
fixRightOfs = (__int64)(fRightOfs * 65536.0);
__int64 fixLeftStep = (__int64)(fLeftStep * 65536.0);
__int64 fixRightStep = (__int64)(fRightStep * 65536.0);
__int64 fixSoundBufferSize = ((__int64)slSoundBufferSize)<<16;
mmSurroundFactor = (__int64)(SWORD)mmSurroundFactor;
SLONG slLeftVolume_ = slLeftVolume >> 16;
SLONG slRightVolume_ = slRightVolume >> 16;
// loop thru source buffer
INDEX iCt = slMixerBufferSize;
FOREVER
{
// if left channel source sample came to end of sample buffer
if( fixLeftOfs >= fixSoundBufferSize) {
fixLeftOfs -= fixSoundBufferSize;
// if has no loop, end it
bEndOfSound = bNotLoop;
}
// if right channel source sample came to end of sample buffer
if( fixRightOfs >= fixSoundBufferSize) {
fixRightOfs -= fixSoundBufferSize;
// if has no loop, end it
bEndOfSound = bNotLoop;
}
// end of buffer?
if( iCt<=0 || bEndOfSound) break;
// fetch one lineary interpolated sample on left channel
slLeftSample = pswSrcBuffer[(fixLeftOfs>>16)+0];
slNextSample = pswSrcBuffer[(fixLeftOfs>>16)+1];
slLeftSample = (slLeftSample*(65535-(fixLeftOfs&65535)) + slNextSample*(fixLeftOfs&65535)) >>16;
// fetch one lineary interpolated sample on right channel
slRightSample = pswSrcBuffer[(fixRightOfs>>16)+0];
slNextSample = pswSrcBuffer[(fixRightOfs>>16)+1];
slRightSample = (slRightSample*(65535-(fixRightOfs&65535)) + slNextSample*(fixRightOfs&65535)) >>16;
// filter samples
slLastLeftSample += ((slLeftSample -slLastLeftSample) *slLeftFilter) >>15;
slLastRightSample += ((slRightSample-slLastRightSample)*slRightFilter)>>15;
// apply stereo volume to current sample
slLeftSample = (slLastLeftSample * slLeftVolume_) >>15;
slRightSample = (slLastRightSample * slRightVolume_)>>15;
slLeftSample ^= (SLONG)((mmSurroundFactor>> 0)&0xFFFFFFFF);
slRightSample ^= (SLONG)((mmSurroundFactor>>32)&0xFFFFFFFF);
// mix in current sample
slLeftSample += pslDstBuffer[0];
slRightSample += pslDstBuffer[1];
// upper clamp
if( slLeftSample > MAX_SWORD) slLeftSample = MAX_SWORD;
if( slRightSample > MAX_SWORD) slRightSample = MAX_SWORD;
// lower clamp
if( slLeftSample < MIN_SWORD) slLeftSample = MIN_SWORD;
if( slRightSample < MIN_SWORD) slRightSample = MIN_SWORD;
// store samples (both channels)
pslDstBuffer[0] = slLeftSample;
pslDstBuffer[1] = slRightSample;
// modify volume `
slLeftVolume += (SWORD)((mmVolumeGain>> 0)&0xFFFF);
slRightVolume += (SWORD)((mmVolumeGain>>16)&0xFFFF);
// advance to next sample
fixLeftOfs += fixLeftStep;
fixRightOfs += fixRightStep;
pslDstBuffer += 2;
iCt--;
}
#elif (defined __MSVC_INLINE__)
__asm {
// convert from floats to fixints 32:16
fld D [fLeftOfs]
fmul D [f65536]
fld D [fRightOfs]
fmul D [f65536]
fld D [fLeftStep]
fmul D [f65536]
fld D [fRightStep]
fmul D [f4G]
fistp Q [mmRightStep] // fixint 32:32
fistp Q [mmLeftStep] // fixint 32:16
fistp Q [fixRightOfs] // fixint 32:16
fistp Q [fixLeftOfs] // fixint 32:16
// get last played sample (for filtering purposes)
movzx eax,W [slLastRightSample]
movzx edx,W [slLastLeftSample]
shl eax,16
or eax,edx
movd mm6,eax // MM6 = 0 | 0 || lastRightSample | lastLeftSample
// get volume
movd mm5,D [slRightVolume]
movd mm0,D [slLeftVolume]
psllq mm5,32
por mm5,mm0 // MM5 = rightVolume || leftVolume
// get filter
mov eax,D [slRightFilter]
mov edx,D [slLeftFilter]
shl eax,16
or eax,edx
movd mm7,eax // MM7 = 0 | 0 || rightFilter | leftFilter
// get offset of each channel inside sound and loop thru destination buffer
mov W [mmRightStep],0
movzx eax,W [fixLeftOfs]
movzx edx,W [fixRightOfs]
shl edx,16
or eax,edx // EAX = right ofs frac | left ofs frac
mov ebx,D [fixLeftOfs+2] // EBX = left ofs int
mov edx,D [fixRightOfs+2] // EDX = right ofs int
mov esi,D [pswSrcBuffer] // ESI = source sound buffer start ptr
mov edi,D [pvMixerBuffer] // EDI = mixer buffer ptr
mov ecx,D [slMixerBufferSize] // ECX = samples counter
sampleLoop:
// check if source offsets came to the end of source sound buffer
cmp ebx,D [slSoundBufferSize]
jl lNotEnd
sub ebx,D [slSoundBufferSize]
push D [bNotLoop]
pop D [bEndOfSound]
lNotEnd:
// same for right channel
cmp edx,D [slSoundBufferSize]
jl rNotEnd
sub edx,D [slSoundBufferSize]
push D [bNotLoop]
pop D [bEndOfSound]
rNotEnd:
// check end of sample
cmp ecx,0
jle loopEnd
cmp D [bEndOfSound],TRUE
je loopEnd
// get sound samples
movd mm1,D [esi+ ebx*2] // MM1 = 0 | 0 || nextLeftSample | leftSample
movd mm2,D [esi+ edx*2] // MM2 = 0 | 0 || nextRightSample | RightSample
psllq mm2,32
por mm1,mm2 // MM1 = nextRightSample | rightSample || nextLeftSample | leftSample
// calc linear interpolation factor (strength)
movd mm3,eax // MM3 = 0 | 0 || right frac | left frac
punpcklwd mm3,mm3
psrlw mm3,1 // MM3 = rightFrac | rightFrac || leftFrac | leftFrac
pxor mm3,Q [mmInvFactor] // MM3 = rightFrac | 1-rightFrac || leftFrac | 1-leftFrac
// apply linear interpolation
pmaddwd mm1,mm3
psrad mm1,15
packssdw mm1,mm1 // MM1 = ? | ? || linearRightSample | linearLeftSample
// apply filter
psubsw mm1,mm6
pmulhw mm1,mm7
psllw mm1,1
paddsw mm1,mm6
movq mm6,mm1
// apply volume adjustment
movq mm0,mm5
psrad mm0,16
packssdw mm0,mm0
pmulhw mm1,mm0
psllw mm1,1
pxor mm1,Q [mmSurroundFactor]
paddd mm5,Q [mmVolumeGain] // modify volume
// unpack to 32bit and mix it into destination buffer
punpcklwd mm1,mm1
psrad mm1,16 // MM1 = finalRightSample || finalLeftSample
paddd mm1,Q [edi]
movq Q [edi],mm1
// advance to next samples in source sound
add eax,D [mmRightStep+0]
adc edx,D [mmRightStep+4]
add ax,W [mmLeftStep +0]
adc ebx,D [mmLeftStep +2]
add edi,8
dec ecx
jmp sampleLoop
loopEnd:
// store modified asm local vars
mov D [fixLeftOfs +0],eax
shr eax,16
mov D [fixRightOfs+0],eax
mov D [fixLeftOfs +2],ebx
mov D [fixRightOfs+2],edx
movd eax,mm6
mov edx,eax
and eax,0x0000FFFF
shr edx,16
mov D [slLastLeftSample],eax
mov D [slLastRightSample],edx
emms
}
#elif (defined __GNU_INLINE__)
// This is implemented in an external NASM file.
MixMono_asm(pso);
#else
#error please write inline asm for your platform.
#endif
_pfSoundProfile.StopTimer(CSoundProfile::PTI_RAWMIXER);
}
// mixes one stereo 16-bit signed sound to destination buffer
inline void MixStereo( CSoundObject *pso)
{
_pfSoundProfile.StartTimer(CSoundProfile::PTI_RAWMIXER);
#if (defined USE_PORTABLE_C)
// initialize some local vars
SLONG slLeftSample, slRightSample, slNextSample;
SLONG *pslDstBuffer = (SLONG*)pvMixerBuffer;
fixLeftOfs = (__int64)(fLeftOfs * 65536.0);
fixRightOfs = (__int64)(fRightOfs * 65536.0);
__int64 fixLeftStep = (__int64)(fLeftStep * 65536.0);
__int64 fixRightStep = (__int64)(fRightStep * 65536.0);
__int64 fixSoundBufferSize = ((__int64)slSoundBufferSize)<<16;
mmSurroundFactor = (__int64)(SWORD)mmSurroundFactor;
SLONG slLeftVolume_ = slLeftVolume >> 16;
SLONG slRightVolume_ = slRightVolume >> 16;
// loop thru source buffer
INDEX iCt = slMixerBufferSize;
FOREVER
{
// if left channel source sample came to end of sample buffer
if( fixLeftOfs >= fixSoundBufferSize) {
fixLeftOfs -= fixSoundBufferSize;
// if has no loop, end it
bEndOfSound = bNotLoop;
}
// if right channel source sample came to end of sample buffer
if( fixRightOfs >= fixSoundBufferSize) {
fixRightOfs -= fixSoundBufferSize;
// if has no loop, end it
bEndOfSound = bNotLoop;
}
// end of buffer?
if( iCt<=0 || bEndOfSound) break;
// fetch one lineary interpolated sample on left channel
slLeftSample = pswSrcBuffer[(fixLeftOfs>>15)+0];
slNextSample = pswSrcBuffer[(fixLeftOfs>>15)+2];
slLeftSample = (slLeftSample*(65535-(fixLeftOfs&65535)) + slNextSample*(fixLeftOfs&65535)) >>16;
// fetch one lineary interpolated sample on right channel
slRightSample = pswSrcBuffer[(fixRightOfs>>15)+0];
slNextSample = pswSrcBuffer[(fixRightOfs>>15)+2];
slRightSample = (slRightSample*(65535-(fixRightOfs&65535)) + slNextSample*(fixRightOfs&65535)) >>16;
// filter samples
slLastLeftSample += ((slLeftSample -slLastLeftSample) *slLeftFilter) >>15;
slLastRightSample += ((slRightSample-slLastRightSample)*slRightFilter)>>15;
// apply stereo volume to current sample
slLeftSample = (slLastLeftSample * slLeftVolume_) >>15;
slRightSample = (slLastRightSample * slRightVolume_)>>15;
slLeftSample ^= (SLONG)((mmSurroundFactor>> 0)&0xFFFFFFFF);
slRightSample ^= (SLONG)((mmSurroundFactor>>32)&0xFFFFFFFF);
// mix in current sample
slLeftSample += pslDstBuffer[0];
slRightSample += pslDstBuffer[1];
// upper clamp
if( slLeftSample > MAX_SWORD) slLeftSample = MAX_SWORD;
if( slRightSample > MAX_SWORD) slRightSample = MAX_SWORD;
// lower clamp
if( slLeftSample < MIN_SWORD) slLeftSample = MIN_SWORD;
if( slRightSample < MIN_SWORD) slRightSample = MIN_SWORD;
// store samples (both channels)
pslDstBuffer[0] = slLeftSample;
pslDstBuffer[1] = slRightSample;
// modify volume `
slLeftVolume += (SWORD)((mmVolumeGain>> 0)&0xFFFF);
slRightVolume += (SWORD)((mmVolumeGain>>16)&0xFFFF);
// advance to next sample
fixLeftOfs += fixLeftStep;
fixRightOfs += fixRightStep;
pslDstBuffer += 2;
iCt--;
}
#elif (defined __MSVC_INLINE__)
__asm {
// convert from floats to fixints 32:16
fld D [fLeftOfs]
fmul D [f65536]
fld D [fRightOfs]
fmul D [f65536]
fld D [fLeftStep]
fmul D [f65536]
fld D [fRightStep]
fmul D [f4G]
fistp Q [mmRightStep] // fixint 32:32
fistp Q [mmLeftStep] // fixint 32:16
fistp Q [fixRightOfs] // fixint 32:16
fistp Q [fixLeftOfs] // fixint 32:16
// get last played sample (for filtering purposes)
movzx eax,W [slLastRightSample]
movzx edx,W [slLastLeftSample]
shl eax,16
or eax,edx
movd mm6,eax // MM6 = 0 | 0 || lastRightSample | lastLeftSample
// get volume
movd mm5,D [slRightVolume]
movd mm0,D [slLeftVolume]
psllq mm5,32
por mm5,mm0 // MM5 = rightVolume || leftVolume
// get filter
mov eax,D [slRightFilter]
mov edx,D [slLeftFilter]
shl eax,16
or eax,edx
movd mm7,eax // MM7 = 0 | 0 || rightFilter | leftFilter
// get offset of each channel inside sound and loop thru destination buffer
mov W [mmRightStep],0
movzx eax,W [fixLeftOfs]
movzx edx,W [fixRightOfs]
shl edx,16
or eax,edx // EAX = right ofs frac | left ofs frac
mov ebx,D [fixLeftOfs+2] // EBX = left ofs int
mov edx,D [fixRightOfs+2] // EDX = right ofs int
mov esi,D [pswSrcBuffer] // ESI = source sound buffer start ptr
mov edi,D [pvMixerBuffer] // EDI = mixer buffer ptr
mov ecx,D [slMixerBufferSize] // ECX = samples counter
sampleLoop:
// check if source offsets came to the end of source sound buffer
cmp ebx,D [slSoundBufferSize]
jl lNotEnd
sub ebx,D [slSoundBufferSize]
push D [bNotLoop]
pop D [bEndOfSound]
lNotEnd:
// same for right channel
cmp edx,D [slSoundBufferSize]
jl rNotEnd
sub edx,D [slSoundBufferSize]
push D [bNotLoop]
pop D [bEndOfSound]
rNotEnd:
// check end of sample
cmp ecx,0
jle loopEnd
cmp D [bEndOfSound],TRUE
je loopEnd
// get sound samples
movq mm1,Q [esi+ ebx*4]
movq mm2,Q [esi+ edx*4]
pslld mm1,16
psrad mm1,16 // MM1 = 0 | nextLeftSample || 0 | leftSample
psrad mm2,16 // MM2 = 0 | nextRightSample || 0 | rightSample
packssdw mm1,mm2 // MM1 = nextRightSample | rightSample || nextLeftSample | leftSample
// calc linear interpolation factor (strength)
movd mm3,eax // MM3 = 0 | 0 || right frac | left frac
punpcklwd mm3,mm3
psrlw mm3,1 // MM3 = rightFrac | rightFrac || leftFrac | leftFrac
pxor mm3,Q [mmInvFactor] // MM3 = rightFrac | 1-rightFrac || leftFrac | 1-leftFrac
// apply linear interpolation
pmaddwd mm1,mm3
psrad mm1,15
packssdw mm1,mm1 // MM1 = ? | ? || linearRightSample | linearLeftSample
// apply filter
psubsw mm1,mm6
pmulhw mm1,mm7
psllw mm1,1
paddsw mm1,mm6
movq mm6,mm1
// apply volume adjustment
movq mm0,mm5
psrad mm0,16
packssdw mm0,mm0
pmulhw mm1,mm0
psllw mm1,1
pxor mm1,Q [mmSurroundFactor]
paddd mm5,Q [mmVolumeGain] // modify volume
// unpack to 32bit and mix it into destination buffer
punpcklwd mm1,mm1
psrad mm1,16 // MM1 = finalRightSample || finalLeftSample
paddd mm1,Q [edi]
movq Q [edi],mm1
// advance to next samples in source sound
add eax,D [mmRightStep+0]
adc edx,D [mmRightStep+4]
add ax,W [mmLeftStep +0]
adc ebx,D [mmLeftStep +2]
add edi,8
dec ecx
jmp sampleLoop
loopEnd:
// store modified asm local vars
mov D [fixLeftOfs +0],eax
shr eax,16
mov D [fixRightOfs+0],eax
mov D [fixLeftOfs +2],ebx
mov D [fixRightOfs+2],edx
movd eax,mm6
mov edx,eax
and eax,0x0000FFFF
shr edx,16
mov D [slLastLeftSample],eax
mov D [slLastRightSample],edx
emms
}
#elif (defined __GNU_INLINE__)
// This is implemented in an external NASM file.
MixStereo_asm(pso);
#else
#error please write inline asm for your platform.
#endif
_pfSoundProfile.StopTimer(CSoundProfile::PTI_RAWMIXER);
}
// mixes one sound to destination buffer
void MixSound( CSoundObject *pso)
{
psd = pso->so_pCsdLink;
// if don't mix encoded sounds if they are not opened properly
if((psd->sd_ulFlags&SDF_ENCODED) &&
(pso->so_psdcDecoder==NULL || !pso->so_psdcDecoder->IsOpen()) ) {
return;
}
// check for supported sound formats
const SLONG slChannels = pso->so_pCsdLink->sd_wfeFormat.nChannels;
const SLONG slBytes = pso->so_pCsdLink->sd_wfeFormat.wBitsPerSample/8;
// unsupported sound formats will be ignored
if( (slChannels!=1 && slChannels!=2) || slBytes!=2) return;
// check for delay
const FLOAT f1oMixerBufferSampleRate = 1.0f / slMixerBufferSampleRate;
const FLOAT fSecondsToMix = (FLOAT)slMixerBufferSize * f1oMixerBufferSampleRate;
pso->so_fDelayed += fSecondsToMix;
if( pso->so_fDelayed < pso->so_sp.sp_fDelay) {
_pfSoundProfile.IncrementCounter(CSoundProfile::PCI_SOUNDSDELAYED, 1);
return;
}
// playing started, so skip further delays
pso->so_fDelayed = 9999.9999f;
// reach sound data and determine sound step, sound buffer and buffer size
pswSrcBuffer = psd->sd_pswBuffer;
fSoundSampleRate = psd->sd_wfeFormat.nSamplesPerSec * pso->so_sp.sp_fPitchShift;
fStep = fSoundSampleRate * f1oMixerBufferSampleRate;
fLeftStep = fStep;
fRightStep = fStep;
slSoundBufferSize = psd->sd_slBufferSampleSize;
// eliminate potentional "puck" at the of sample that hasn't loop
if( !(pso->so_slFlags&SOF_LOOP) && slSoundBufferSize>1) slSoundBufferSize--;
// get old and new volumes
FLOAT fLeftVolume = ClampDn( pso->so_fLastLeftVolume, 0.0f);
FLOAT fRightVolume = ClampDn( pso->so_fLastRightVolume, 0.0f);
FLOAT fNewLeftVolume = ClampDn( pso->so_sp.sp_fLeftVolume, 0.0f);
FLOAT fNewRightVolume = ClampDn( pso->so_sp.sp_fRightVolume, 0.0f);
// adjust for master volume
if(pso->so_slFlags&SOF_MUSIC) {
fNewLeftVolume *= snd_fMusicVolume;
fNewRightVolume *= snd_fMusicVolume;
} else {
fNewLeftVolume *= snd_fSoundVolume;
fNewRightVolume *= snd_fSoundVolume;
}
// if both channel volumes are too low
if( fLeftVolume<0.001f && fRightVolume<0.001f && fNewLeftVolume<0.001f && fNewRightVolume<0.001f)
{
// if this is not an encoded sound
if( !(psd->sd_ulFlags&SDF_ENCODED) ) {
// skip mixing of this sample segment
fOfsDelta = fStep*slMixerBufferSampleRate*fSecondsToMix;
pso->so_fLeftOffset += fOfsDelta;
pso->so_fRightOffset += fOfsDelta;
const FLOAT fMinOfs = Min( pso->so_fLeftOffset, pso->so_fRightOffset);
ASSERT( fMinOfs>=0);
if( fMinOfs<0) CPrintF( "BUG: negative offset (%.2g) encountered in sound: '%s' !\n", fMinOfs, (const char *) (CTString&)psd->GetName());
// if looping
if (pso->so_slFlags & SOF_LOOP) {
// adjust offset ptrs inside sound
while( pso->so_fLeftOffset < 0) pso->so_fLeftOffset += slSoundBufferSize;
while( pso->so_fRightOffset < 0) pso->so_fRightOffset += slSoundBufferSize;
while( pso->so_fLeftOffset >= slSoundBufferSize) pso->so_fLeftOffset -= slSoundBufferSize;
while( pso->so_fRightOffset >= slSoundBufferSize) pso->so_fRightOffset -= slSoundBufferSize;
// if not looping
} else {
// no more playing
pso->so_slFlags &= ~SOF_PLAY;
pso->so_fDelayed = 0.0f;
pso->so_sp.sp_fDelay = 0.0f;
}
}
// reset last samples
pso->so_swLastLeftSample = 0;
pso->so_swLastRightSample = 0;
// update volume
pso->so_fLastLeftVolume = fNewLeftVolume;
pso->so_fLastRightVolume = fNewRightVolume;
_pfSoundProfile.IncrementCounter(CSoundProfile::PCI_SOUNDSSKIPPED, 1);
return;
}
_sfStats.IncrementCounter(CStatForm::SCI_SOUNDSMIXING);
// cache sound object vars
fPhase = pso->so_sp.sp_fPhaseShift;
fLeftOfs = pso->so_fLeftOffset;
fRightOfs = pso->so_fRightOffset;
fOfsDelta = pso->so_fOffsetDelta;
slLeftVolume = FloatToInt(fLeftVolume * 65536*32767.0f);
slRightVolume = FloatToInt(fRightVolume * 65536*32767.0f);
const FLOAT fMixBufSize = 65536*32767.0f / slMixerBufferSize;
const SLONG slLeftGain = FloatToInt( (fNewLeftVolume -fLeftVolume) *fMixBufSize);
const SLONG slRightGain = FloatToInt( (fNewRightVolume-fRightVolume) *fMixBufSize);
mmVolumeGain = ((__int64)(slRightGain)<<32) | ((__int64)(slLeftGain)&0xFFFFFFFF);
// extrapolate back new volumes because of not enough precision in interpolation!
// (otherwise we might hear occasional pucks)
if( fNewLeftVolume >0.001f) fNewLeftVolume = (slLeftVolume + slLeftGain *slMixerBufferSize) /(65536*32767.0f);
if( fNewRightVolume>0.001f) fNewRightVolume = (slRightVolume + slRightGain*slMixerBufferSize) /(65536*32767.0f);
//ASSERT( fNewLeftVolume>=0 && fNewRightVolume>=0);
//CPrintF( "NV: %.4f / %.4f, GV: %.4f / %.4f\n", fNewLeftVolume,fNewRightVolume, fLeftGainedVolume,fRightGainedVolume);
// determine filtering and surround
slLeftFilter = pso->so_sp.sp_slLeftFilter;
slRightFilter = pso->so_sp.sp_slRightFilter;
bNotLoop = !(pso->so_slFlags & SOF_LOOP);
mmSurroundFactor = 0;
if( pso->so_slFlags & SOF_SURROUND) mmSurroundFactor = 0x0000FFFF;
// if this is an encoded sound
BOOL bDecodingFinished = FALSE;
if( psd->sd_ulFlags&SDF_ENCODED) {
_pfSoundProfile.StartTimer(CSoundProfile::PTI_DECODESOUND);
// decode some samples from it
SLONG slWantedBytes = FloatToInt(slMixerBufferSize*fStep*pso->so_pCsdLink->sd_wfeFormat.nChannels) *2;
void *pvDecodeBuffer = _pSound->sl_pswDecodeBuffer;
ASSERT(slWantedBytes<=_pSound->sl_slDecodeBufferSize);
SLONG slDecodedBytes = pso->so_psdcDecoder->Decode( pvDecodeBuffer, slWantedBytes);
ASSERT(slDecodedBytes<=slWantedBytes);
// if it has a loop
if (!bNotLoop) {
// if sound is shorter than buffer
while(slDecodedBytes<slWantedBytes) {
// decode it again and again
pso->so_psdcDecoder->Reset();
slDecodedBytes += pso->so_psdcDecoder->Decode( ((UBYTE*)pvDecodeBuffer) +
slDecodedBytes, slWantedBytes-slDecodedBytes);
}
// if it doesn't have a loop
} else {
// if sound is shorter than buffer
if(slDecodedBytes<slWantedBytes) {
// mark that it is finished
bDecodingFinished = TRUE;
}
}
// copy first sample to the last one (this is needed for linear interpolation)
(ULONG&)(((UBYTE*)pvDecodeBuffer)[slDecodedBytes]) = *(ULONG*)pvDecodeBuffer;
// fix some mixer variables to play temporary decode buffer instead of real sound
pswSrcBuffer = (SWORD*)pvDecodeBuffer;
slSoundBufferSize = slDecodedBytes>>2; // convert to samples
fLeftOfs = 0.0f;
fRightOfs = 0.0f;
fPhase = 0.0f;
_pfSoundProfile.StopTimer(CSoundProfile::PTI_DECODESOUND);
}
_pfSoundProfile.IncrementCounter(CSoundProfile::PCI_SOUNDSMIXED, 1);
_pfSoundProfile.IncrementCounter(CSoundProfile::PCI_SAMPLES, slMixerBufferSize);
_pfSoundProfile.StartTimer(CSoundProfile::PTI_MIXSOUND);
slLastLeftSample = pso->so_swLastLeftSample;
slLastRightSample = pso->so_swLastRightSample;
// calculate eventual new offsets from phase shift
FLOAT fLastPhase = fOfsDelta / fSoundSampleRate;
FLOAT fPhaseDelta = fPhase - fLastPhase;
FLOAT fStepDelta = Abs( fPhaseDelta*fSoundSampleRate / slMixerBufferSize);
FLOAT fStepDeltaL, fStepDeltaR;
if( fPhaseDelta>0) {
fStepDeltaL = fStepDelta/2;
if( fStepDeltaL>fLeftStep/2) fStepDeltaL = fLeftStep/2;
fStepDeltaL = -fStepDeltaL;
fStepDeltaR = fStepDelta + fStepDeltaL;
} else {
fStepDeltaR = fStepDelta/2;
if( fStepDeltaR>fLeftStep/2) fStepDeltaR = fLeftStep/2;
fStepDeltaR = -fStepDeltaR;
fStepDeltaL = fStepDelta + fStepDeltaR;
}
fLeftStep += fStepDeltaL;
fRightStep += fStepDeltaR;
fStepDelta = fStepDeltaR-fStepDeltaL;
// if there is anything to mix (could be nothing when encoded file just finished)
if( slSoundBufferSize>0) {
// safety check (needed because of bad-bug!)
FLOAT fMinOfs = Min( fLeftOfs, fRightOfs);
ASSERT( fMinOfs>=0);
if( fMinOfs<0) CPrintF( "BUG: negative offset (%.2g) encountered in sound: '%s' !\n", fMinOfs, (const char *) (CTString&)psd->GetName());
// adjust offset ptrs inside sound to match those of phase shift
while( fLeftOfs < 0) fLeftOfs += slSoundBufferSize;
while( fRightOfs < 0) fRightOfs += slSoundBufferSize;
while( fLeftOfs >= slSoundBufferSize) fLeftOfs -= slSoundBufferSize;
while( fRightOfs >= slSoundBufferSize) fRightOfs -= slSoundBufferSize;
// if mono output is required
if( snd_bMono) {
// monomize channels (cool word:)
fLeftOfs = (fLeftOfs+fRightOfs)/2;
fRightOfs = fLeftOfs;
fLeftStep = (fLeftStep+fRightStep)/2;
fRightStep = fLeftStep;
slLeftVolume = (slLeftVolume+slRightVolume)/2;
slRightVolume = slLeftVolume;
slLeftFilter = (slLeftFilter+slRightFilter)/2;
slRightFilter = slLeftFilter;
}
// call corresponding mixer routine for current sound format
bEndOfSound = FALSE;
if( slChannels==2) {
// mix as 16-bit stereo
MixStereo( pso);
} else {
// mix as 16-bit mono
MixMono( pso);
}
}
// if encoded sound
if( psd->sd_ulFlags&SDF_ENCODED) {
// ignore mixing finished flag, but use decoding finished flag
bEndOfSound = bDecodingFinished;
}
// if sound ended, not buffer
if( bEndOfSound) {
// reset some sound vars
slLastLeftSample = 0;
slLastRightSample = 0;
pso->so_slFlags &= ~SOF_PLAY;
pso->so_fDelayed = 0.0f;
pso->so_sp.sp_fDelay = 0.0f;
}
// rememer last samples for the next mix in
pso->so_swLastLeftSample = (SWORD)slLastLeftSample;
pso->so_swLastRightSample = (SWORD)slLastRightSample;
// determine new phase shift offset
pso->so_fOffsetDelta += fStepDelta*slMixerBufferSize;
// update play offset for the next mix iteration
pso->so_fLeftOffset = fixLeftOfs * (1.0f/65536.0f);
pso->so_fRightOffset = fixRightOfs * (1.0f/65536.0f);
// update volume
pso->so_fLastLeftVolume = fNewLeftVolume;
pso->so_fLastRightVolume = fNewRightVolume;
//if( pso->so_fLastLeftVolume>0 || pso->so_fLastRightVolume>0 || fNewLeftVolume>0 || fNewRightVolume>0) {
// CPrintF( "SO: 0x%8X; OV: %.4f / %.4f, NV: %.4f / %.4f\n", pso,
// pso->so_fLastLeftVolume,pso->so_fLastRightVolume, fNewLeftVolume,fNewRightVolume);
//}
_pfSoundProfile.StopTimer(CSoundProfile::PTI_MIXSOUND);
}