/********************************************************************** Audacity: A Digital Audio Editor VoiceKey.cpp ?? Dominic Mazzoni ?? Shane Muller *******************************************************************//*! \class VoiceKey \brief This implements a voice key, detecting either the next "ON" or "OFF" point *//*******************************************************************/ #include "Audacity.h" #include "VoiceKey.h" #include #include #include #include #include #include #include #include #include using std::cout; using std::endl; VoiceKey::VoiceKey(){ mWindowSize = 0.01; //size of analysis window in seconds mEnergyMean = .0006; // reasonable initial levels assuming sampling rate of mEnergySD = .0002; // 44100 hertz mSignChangesMean = .08; mSignChangesSD= .02; mDirectionChangesMean = .25; mDirectionChangesSD = .2; AdjustThreshold(2); mSilentWindowSize = .05; //Amount of time (in seconds) below threshold to call it silence mSignalWindowSize = .05; //Amount of time (in seconds) above threshold to call it signal mUseEnergy = true; mUseSignChangesLow = false; mUseSignChangesHigh = false; mUseDirectionChangesLow = false; mUseDirectionChangesHigh = false; }; VoiceKey::~VoiceKey(){ }; //--------------------------------------------------------------------------- // VoiceKey::On/Off Forward/Backward // This operates in two phases: // First, you take chunks of samples that are WindowSize big. // If you have a run of them where something passes the threshold for SignalWindowSize seconds, // you return to the last empty block and scan forward one sample at a time until you find the // starting point of the speech. //Move forward to find an ON region. sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount len) { if((mWindowSize) >= len+10){ wxMessageBox(_("Selection is too small to use voice key.")); return start; } else{ sampleCount lastsubthresholdsample; // keeps track of the sample number of the last sample to not exceed the threshold //Change the millisecond-based parameters into sample-based parameters double rate = t.GetRate(); //Translates seconds to samples unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine unsigned int SignalWindowSizeInt = (unsigned int)(rate * mSignalWindowSize); //This much signal is necessary to trip key int samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection lastsubthresholdsample = start; //start this off at the selection start unsigned int i; //iterates through waveblock int blockruns=0; //keeps track of the number of consecutive above-threshold blocks int blocksize; //The final block may be smaller than WindowSizeInt, so use this //This loop goes through the selection a block at a time. If a long enough run //of above-threshold blocks occur, we return to the last sub-threshold block and //go through one sample at a time. //If there are fewer than 10 samples leftover, don't bother. for(i = start; samplesleft >=10; i+=(WindowSizeInt-1) , samplesleft -= (WindowSizeInt -1)){ //Set blocksize so that it is the right size if((unsigned int)samplesleft < WindowSizeInt){ blocksize = samplesleft; } else{ blocksize = WindowSizeInt; } //Test whether we are above threshold (the number of stats) if(AboveThreshold(t,i,blocksize)) { blockruns++; //Hit } else{ blockruns=0; //Miss--start over lastsubthresholdsample = i; } //If the blockrun is long enough, break out of the loop early: if(blockruns > mSignalWindowSize/mWindowSize) break; } //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully if(samplesleft > 10){ //Calculate how many to scan through--we only have to go through (at most) //the first window + 1 samples--but we need another window samples to draw from. samplesleft = 2*WindowSizeInt+1; //To speed things up, create a local buffer to store things in, to avoid the costly t.Get(); //Only go through the first SignalWindowSizeInt samples, and choose the first that trips the key. sampleFormat *buffer = new sampleFormat[samplesleft]; t.Get((samplePtr)buffer, floatSample,lastsubthresholdsample,samplesleft); //Initialize these trend markers atrend and ztrend. They keep track of the //up/down trends at the start and end of the evaluation window. int atrend = sgn(buffer[1]-buffer[0]); int ztrend = sgn(buffer[WindowSizeInt+1]-buffer[WindowSizeInt]); double erg=0; double sc=0; double dc=0; //Get initial test statistic values. if(mUseEnergy) erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt); if(mUseSignChangesLow || mUseSignChangesHigh) sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt); if(mUseDirectionChangesLow || mUseDirectionChangesHigh) dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt); //Now, go through the sound again, sample by sample. for(i=0; imThresholdEnergy); testThreshold++; } if(mUseSignChangesLow) { TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(sc < mThresholdSignChangesLower); testThreshold++; } if(mUseSignChangesHigh) { TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(sc > mThresholdSignChangesUpper); testThreshold++; } if(mUseDirectionChangesLow) { TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(dc < mThresholdDirectionChangesLower); testThreshold++; } if(mUseDirectionChangesHigh) { TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(dc > mThresholdDirectionChangesUpper); testThreshold++; } if(tests >= testThreshold) { //Finish off on the first hit break; } } //When we get here, i+lastsubthresholdsample is the best guess for where the word starts delete [] buffer; return i+lastsubthresholdsample; } else{ //If we failed to find anything, return the start position return start ; } } } //Move backward from end to find an ON region. sampleCount VoiceKey::OnBackward (WaveTrack & t, sampleCount end, sampleCount len) { if((mWindowSize) >= len+10){ wxMessageBox(_("Selection is too small to use voice key.")); return end; } else{ sampleCount lastsubthresholdsample; // keeps track of the sample number of the last sample to not exceed the threshold //Change the millisecond-based parameters into sample-based parameters double rate = t.GetRate(); //Translates seconds to samples unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine //unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key int samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection lastsubthresholdsample = end; //start this off at the end unsigned int i; //iterates through waveblock int blockruns=0; //keeps track of the number of consecutive above-threshold blocks int blocksize; //The final block may be smaller than WindowSizeInt, so use this //This loop goes through the selection a block at a time in reverse order. If a long enough run //of above-threshold blocks occur, we return to the last sub-threshold block and //go through one sample at a time. //If there are fewer than 10 samples leftover, don't bother. for(i = end - WindowSizeInt; samplesleft >=10; i-=(WindowSizeInt-1) , samplesleft -= (WindowSizeInt -1)){ //Set blocksize so that it is the right size if(samplesleft < (int)WindowSizeInt){ blocksize = samplesleft; } else{ blocksize = WindowSizeInt; } //Test whether we are above threshold if(AboveThreshold(t,i,blocksize)) { blockruns++; //Hit } else { blockruns=0; //Miss--start over lastsubthresholdsample = i+WindowSizeInt; } //If the blockrun is long enough, break out of the loop early: if(blockruns > mSilentWindowSize/mWindowSize) break; } //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully if(samplesleft > 10){ //Calculate how many to scan through--we only have to go through (at most) //the first window + 1 samples--but we need another window samples to draw from. samplesleft = 2*WindowSizeInt+1; //To speed things up, create a local buffer to store things in, to avoid the costly t.Get(); //Only go through the first mSilentWindowSizeInt samples, and choose the first that trips the key. sampleFormat *buffer = new sampleFormat[samplesleft]; t.Get((samplePtr)buffer, floatSample, lastsubthresholdsample-samplesleft,samplesleft); //Initialize these trend markers atrend and ztrend. They keep track of the //up/down trends at the start and end of the evaluation window. int atrend = sgn(buffer[samplesleft - 2]-buffer[samplesleft - 1]); int ztrend = sgn(buffer[samplesleft - WindowSizeInt-2]-buffer[samplesleft - WindowSizeInt-2]); double erg=0; double sc = 0; double dc = 0; //Get initial test statistic values. if(mUseEnergy) erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt); if(mUseSignChangesLow || mUseSignChangesHigh) sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt); if(mUseDirectionChangesLow || mUseDirectionChangesHigh) dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt); //Now, go through the sound again, sample by sample. for(i=samplesleft-1; i>WindowSizeInt; i--){ int tests = 0; int testThreshold = 0; //Update the test statistics if(mUseEnergy) { TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]); tests += (int)(erg>mThresholdEnergy); testThreshold++; } if(mUseSignChangesLow) { TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(sc < mThresholdSignChangesLower); testThreshold++; } if(mUseSignChangesHigh) { TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(sc > mThresholdSignChangesUpper); testThreshold++; } if(mUseDirectionChangesLow) { TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(dc < mThresholdDirectionChangesLower); testThreshold++; } if(mUseDirectionChangesHigh) { TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(dc > mThresholdDirectionChangesUpper); testThreshold++; } if(tests >= testThreshold) { //Finish off on the first hit break; } } //When we get here, i+lastsubthresholdsample is the best guess for where the word starts delete [] buffer; return lastsubthresholdsample - samplesleft + i; } else{ //If we failed to find anything, return the start position return end ; } } } //Move froward from the start to find an OFF region. sampleCount VoiceKey::OffForward (WaveTrack & t, sampleCount start, sampleCount len) { if((mWindowSize) >= len+10){ wxMessageBox(_("Selection is too small to use voice key.")); return start; } else{ sampleCount lastsubthresholdsample; // keeps track of the sample number of the last sample to not exceed the threshold //Change the millisecond-based parameters into sample-based parameters double rate = t.GetRate(); //Translates seconds to samples unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key int samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection lastsubthresholdsample = start; //start this off at the selection start unsigned int i; //iterates through waveblock int blockruns=0; //keeps track of the number of consecutive above-threshold blocks int blocksize; //The final block may be smaller than WindowSizeInt, so use this //This loop goes through the selection a block at a time. If a long enough run //of above-threshold blocks occur, we return to the last sub-threshold block and //go through one sample at a time. //If there are fewer than 10 samples leftover, don't bother. for(i = start; samplesleft >=10; i+=(WindowSizeInt-1) , samplesleft -= (WindowSizeInt -1)){ //Set blocksize so that it is the right size if(samplesleft < (int)WindowSizeInt){ blocksize = samplesleft; } else{ blocksize = WindowSizeInt; } if(!AboveThreshold(t,i,blocksize)) { blockruns++; //Hit } else { blockruns=0; //Above threshold--start over lastsubthresholdsample = i; } //If the blockrun is long enough, break out of the loop early: if(blockruns > mSilentWindowSize/mWindowSize) break; } //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully if(samplesleft > 10){ //Calculate how many to scan through--we only have to go through (at most) //the first window + 1 samples--but we need another window samples to draw from. samplesleft = 2*WindowSizeInt+1; //To speed things up, create a local buffer to store things in, to avoid the costly t.Get(); //Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key. sampleFormat *buffer = new sampleFormat[samplesleft]; t.Get((samplePtr)buffer, floatSample, lastsubthresholdsample,samplesleft); //Initialize these trend markers atrend and ztrend. They keep track of the //up/down trends at the start and end of the evaluation window. int atrend = sgn(buffer[1]-buffer[0]); int ztrend = sgn(buffer[WindowSizeInt+1]-buffer[WindowSizeInt]); double erg=0; double sc=0; double dc=0; //Get initial test statistic values. if(mUseEnergy) erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt); if(mUseSignChangesLow || mUseSignChangesHigh) sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt); if(mUseDirectionChangesLow || mUseDirectionChangesHigh) dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt); //Now, go through the sound again, sample by sample. for(i=0; imThresholdEnergy); testThreshold++; } if(mUseSignChangesLow) { TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(sc < mThresholdSignChangesLower); testThreshold++; } if(mUseSignChangesHigh) { TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(sc > mThresholdSignChangesUpper); testThreshold++; } if(mUseDirectionChangesLow) { TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(dc < mThresholdDirectionChangesLower); testThreshold++; } if(mUseDirectionChangesHigh) { TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(dc > mThresholdDirectionChangesUpper); testThreshold++; } if(tests < testThreshold) { //Finish off on the first below-threshold block break; } } //When we get here, i+lastsubthresholdsample is the best guess for where the word starts delete [] buffer; return i+lastsubthresholdsample; } else{ //If we failed to find anything, return the start position return start ; } } } //Move backward from the end to find an OFF region sampleCount VoiceKey::OffBackward (WaveTrack & t, sampleCount end, sampleCount len) { if((mWindowSize) >= len+10){ wxMessageBox(_("Selection is too small to use voice key.")); return end; } else{ sampleCount lastsubthresholdsample; // keeps track of the sample number of the last sample to not exceed the threshold //Change the millisecond-based parameters into sample-based parameters double rate = t.GetRate(); //Translates seconds to samples unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine //unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key int samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection lastsubthresholdsample = end; //start this off at the end unsigned int i; //iterates through waveblock int blockruns=0; //keeps track of the number of consecutive above-threshold blocks int blocksize; //The final block may be smaller than WindowSizeInt, so use this //This loop goes through the selection a block at a time in reverse order. If a long enough run //of above-threshold blocks occur, we return to the last sub-threshold block and //go through one sample at a time. //If there are fewer than 10 samples leftover, don't bother. for(i = end - WindowSizeInt; samplesleft >=10; i-=(WindowSizeInt-1) , samplesleft -= (WindowSizeInt -1)){ //Set blocksize so that it is the right size if(samplesleft < (int)WindowSizeInt){ blocksize = samplesleft; } else{ blocksize = WindowSizeInt; } if(!AboveThreshold(t,i,blocksize)) { blockruns++; //Hit } else { blockruns=0; //Miss--start over lastsubthresholdsample = i+WindowSizeInt; } //If the blockrun is long enough, break out of the loop early: if(blockruns > mSilentWindowSize/mWindowSize) break; } //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully if(samplesleft > 10){ //Calculate how many to scan through--we only have to go through (at most) //the first window + 1 samples--but we need another window samples to draw from. samplesleft = 2*WindowSizeInt+1; //To speed things up, create a local buffer to store things in, to avoid the costly t.Get(); //Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key. sampleFormat *buffer = new sampleFormat[samplesleft]; t.Get((samplePtr)buffer, floatSample, lastsubthresholdsample-samplesleft,samplesleft); //Initialize these trend markers atrend and ztrend. They keep track of the //up/down trends at the start and end of the evaluation window. int atrend = sgn(buffer[samplesleft - 2]-buffer[samplesleft - 1]); int ztrend = sgn(buffer[samplesleft - WindowSizeInt-2]-buffer[samplesleft - WindowSizeInt-2]); double erg=0; double sc=0; double dc=0; //Get initial test statistic values. if(mUseEnergy) erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt); if(mUseSignChangesLow || mUseSignChangesHigh) sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt); if(mUseDirectionChangesLow || mUseDirectionChangesHigh) dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt); //Now, go through the sound again, sample by sample. for(i=samplesleft-1; i>WindowSizeInt; i--){ int tests = 0; int testThreshold = 0; //Update the test statistics if(mUseEnergy) { TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]); tests += (int)(erg>mThresholdEnergy); testThreshold++; } if(mUseSignChangesLow) { TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(sc < mThresholdSignChangesLower); testThreshold++; } if(mUseSignChangesHigh) { TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(sc > mThresholdSignChangesUpper); testThreshold++; } if(mUseDirectionChangesLow) { TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(dc < mThresholdDirectionChangesLower); testThreshold++; } if(mUseDirectionChangesHigh) { TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]); tests += (int)(dc > mThresholdDirectionChangesUpper); testThreshold++; } if(tests < testThreshold) { //Finish off on the first hit break; } } //When we get here, i+lastsubthresholdsample is the best guess for where the word starts delete [] buffer; return lastsubthresholdsample - samplesleft + i; } else{ //If we failed to find anything, return the start position return end ; } } } //This tests whether a specified block region is above or below threshold. bool VoiceKey::AboveThreshold(WaveTrack & t, sampleCount start, sampleCount len) { double erg=0; double sc=0; double dc=0; //These store three statistics: energy, signchanges, and directionchanges int tests =0; //Keeps track of how many statistics surpass the threshold. int testThreshold=0; //Keeps track of the threshold. //Calculate the test statistics if(mUseEnergy) { testThreshold++; erg = TestEnergy(t, start,len); tests +=(int)(erg > mThresholdEnergy); #if 0 std::cout << "Energy: " << erg << " " < mThresholdSignChangesUpper); #if 0 std::cout << "SignChanges: " << sc << " " < mThresholdDirectionChangesUpper); #if 0 std::cout << "DirectionChanges: " << dc << " " <= testThreshold); } //This adjusts the threshold. Larger values of t expand the noise region, //making more things be classified as noise (and requiring a stronger signal). void VoiceKey::AdjustThreshold(double t){ mThresholdAdjustment = t; mThresholdEnergy = mEnergyMean + mEnergySD * t; mThresholdSignChangesUpper = mSignChangesMean + mSignChangesSD * t; mThresholdSignChangesLower = mSignChangesMean - mSignChangesSD * t; mThresholdDirectionChangesUpper = mDirectionChangesMean + mDirectionChangesSD * t; mThresholdDirectionChangesLower = mDirectionChangesMean - mDirectionChangesSD * t; }; //This 'calibrates' the voicekey to noise void VoiceKey::CalibrateNoise(WaveTrack & t, sampleCount start, sampleCount len){ //To calibrate the noise, we need to scan the sample block just like in the voicekey and //calculate the mean and standard deviation of the test statistics. //Then, we set the BaselineThreshold to be one wxBusyCursor busy; //initialize some sample statistics: sums of X and X^2 double sumerg, sumerg2; double sumsc, sumsc2; double sumdc, sumdc2; double erg, sc, dc; // sampleFormat a1=0; // sampleFormat a2=0; // sampleFormat z1=0; // sampleFormat z2=0; // keeps track of initial and final samples of window to enable updating formulae int atrend=0; // equals sgn(a2-a1); Keeps track of trend at start of window int ztrend=0; // equals sgn(z2-z1); Keeps track of trend at end of window //Now, change the millisecond-based parameters into sample-based parameters //(This depends on WaveTrack t) double rate = t.GetRate(); unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); // unsigned int SignalWindowSizeInt = (unsigned int)(rate * mSignalWindowSize); //Get the first test statistics //Calibrate all of the statistic, because they might be //changed later. // if(mUseEnergy) erg = TestEnergy(t, start, WindowSizeInt); // if(mUseSignChanges) sc = TestSignChanges(t,start, WindowSizeInt); // if(mUseDirectionChanges) dc = TestDirectionChanges(t,start,WindowSizeInt); //Calculate initial values for the trend trackers--------------------// //Get((samplePtr)a1, floatSample, start, 1); // t.Get((samplePtr)a2, floatSample, start+1, 1); // t.Get((samplePtr)z1, floatSample, start+WindowSizeInt-1, 1); // t.Get((samplePtr)z2, floatSample, start+WindowSizeInt, 1); atrend = -1;//sgn(a2-a1); ztrend = -1;// sgn(z2-z1); //-------------------------------------------------------------------// sumerg =0.0; sumerg2 = 0.0; sumsc =0.0; sumsc2 = 0.0; sumdc =0.0; sumdc2 =0.0; // int n = len - WindowSizeInt; //This is how many samples we have int samplesleft = len - WindowSizeInt; int i; int blocksize; int samples=0; for(i = start; samplesleft >=10 ; i += (WindowSizeInt -1), samplesleft -= (WindowSizeInt -1) ) { //Take samples chunk-by-chunk. //Normally, this should be in WindowSizeInt chunks, but at the end (if there are more than 10 //samples left) take a chunk that eats the rest of the samples. samples++; //Increment the number of samples we have if(samplesleft < (int)WindowSizeInt) { blocksize = samplesleft; } else { blocksize = WindowSizeInt; } erg = TestEnergy(t, i, blocksize); sumerg +=(double)erg; sumerg2 += pow((double)erg,2); sc = TestSignChanges(t,i, blocksize); sumsc += (double)sc; sumsc2 += pow((double)sc,2); dc = TestDirectionChanges(t,i,blocksize); sumdc += (double)dc; sumdc2 += pow((double)dc,2); } mEnergyMean = sumerg / samples; mEnergySD = sqrt(sumerg2/samples - mEnergyMean*mEnergyMean); mSignChangesMean = sumsc / samples; mSignChangesSD = sqrt(sumsc2 / samples - mSignChangesMean * mSignChangesMean); mDirectionChangesMean = sumdc / samples; mDirectionChangesSD =sqrt(sumdc2 / samples - mDirectionChangesMean * mDirectionChangesMean) ; wxString text = wxString::Format(_("Calibration Results\n")); text += wxString::Format(_("Energy -- mean: %1.4f sd: (%1.4f)\n"),mEnergyMean,mEnergySD); text+= wxString::Format(_("Sign Changes -- mean: %1.4f sd: (%1.4f)\n"),mSignChangesMean,mSignChangesSD); text+= wxString::Format(_("Direction Changes -- mean: %1.4f sd: (%1.4f)\n"),mDirectionChangesMean,mDirectionChangesSD); wxMessageDialog * stats = new wxMessageDialog(NULL, text, wxT("Calibration Complete"), wxOK | wxICON_INFORMATION, wxPoint(-1,-1)); stats->ShowModal(); delete stats; AdjustThreshold(mThresholdAdjustment); } void VoiceKey::SetKeyType(bool erg, bool scLow , bool scHigh, bool dcLow, bool dcHigh) { mUseEnergy = erg; mUseSignChangesLow = scLow; mUseSignChangesHigh = scHigh; mUseDirectionChangesLow = dcLow; mUseDirectionChangesHigh = dcHigh; } //This might continue over a number of blocks. double VoiceKey::TestEnergy (WaveTrack & t, sampleCount start, sampleCount len) { double sum = 1; sampleCount s = start; //Keep track of start sampleCount originalLen = len; //Keep track of the length of block to process (its not the length of t) sampleCount blockSize = t.GetMaxBlockSize(); //Determine size of sampling buffer if( blockSize > len) blockSize = len; float *buffer = new float[blockSize]; //Get a sampling buffer while(len > 0) { sampleCount block = t.GetBestBlockSize(s); //Figure out how much to grab if(block > len) block = len; //Don't grab too much! t.Get((samplePtr)buffer,floatSample, s,block); //grab the block; //Now, go through the block and calculate energy for(int i = 0; i< block; i++) { sum += buffer[i]*buffer[i]; } len -= block; s += block; } delete [] buffer; return sum / originalLen; } //This will update RMSE by adding one element and subtracting another void VoiceKey::TestEnergyUpdate (double & prevErg, int len, const sampleFormat & drop, const sampleFormat & add) { //This is an updating formula for RMSE. It will only recalculate what's changed. prevErg = prevErg + (double)(abs(add) - abs(drop))/len; } double VoiceKey::TestSignChanges(WaveTrack & t, sampleCount start, sampleCount len) { sampleCount s = start; //Keep track of start sampleCount originalLen = len; //Keep track of the length of block to process (its not the length of t) sampleCount blockSize = t.GetMaxBlockSize(); //Determine size of sampling buffer unsigned long signchanges = 1; int currentsign=0; if( blockSize > len) blockSize = len; sampleFormat *buffer = new sampleFormat[blockSize]; //Get a sampling buffer while(len > 0) { sampleCount block = t.GetBestBlockSize(s); //Figure out how much to grab if(block > len) block = len; //Don't grab too much! t.Get((samplePtr)buffer,floatSample, s,block); //grab the block; if (len == originalLen) { //The first time through, set stuff up special. currentsign = sgn(buffer[0]); } //Now, go through the block and calculate zero crossings for(int i = 0; i< block; i++) { if( sgn(buffer[i]) != currentsign) { currentsign = sgn(buffer[i]); signchanges++; } } len -= block; s += block; } delete [] buffer; return (double)signchanges/originalLen; } void VoiceKey::TestSignChangesUpdate(double & currentsignchanges, int len, const sampleFormat & a1, const sampleFormat & a2, const sampleFormat & z1, const sampleFormat & z2) { if(sgn(a1)!=sgn(a2)) currentsignchanges -= 1.0/len; if(sgn(z1)!=sgn(z2)) currentsignchanges += 1.0/len; } double VoiceKey::TestDirectionChanges(WaveTrack & t, sampleCount start, sampleCount len) { sampleCount s = start; //Keep track of start sampleCount originalLen = len; //Keep track of the length of block to process (its not the length of t) sampleCount blockSize = t.GetMaxBlockSize(); //Determine size of sampling buffer unsigned long directionchanges = 1; sampleFormat lastval=sampleFormat(0); int lastdirection=1; if( blockSize > len) blockSize = len; sampleFormat *buffer = new sampleFormat[blockSize]; //Get a sampling buffer while(len > 0) { sampleCount block = t.GetBestBlockSize(s); //Figure out how much to grab if(block > len) block = len; //Don't grab too much! t.Get((samplePtr)buffer,floatSample, s,block); //grab the block; if (len == originalLen) { //The first time through, set stuff up special. lastval = buffer[0]; } //Now, go through the block and calculate zero crossings for(int i = 0; i< block; i++){ if( sgn(buffer[i]-lastval) != lastdirection) { directionchanges++; lastdirection = sgn(buffer[i] - lastval); } lastval = buffer[i]; } len -= block; s += block; } delete [] buffer; return (double)directionchanges/originalLen; } // This method does an updating by looking at the trends // This will change currentdirections and atrend/trend, so be warned. void VoiceKey::TestDirectionChangesUpdate(double & currentdirectionchanges, int len, int & atrend, const sampleFormat & a1, const sampleFormat & a2, int & ztrend, const sampleFormat & z1, const sampleFormat & z2) { if(sgn(a2 - a1)!= atrend ) { //Here, the direction shifted for the item we're dropping. currentdirectionchanges -= 1.0/len; atrend = sgn(a2-a1); } if(sgn(z2 - z1)!= ztrend){ //Here, the direction shifts when we add an item currentdirectionchanges += 1.0/len; ztrend = sgn(z2-z1); } } // Indentation settings for Vim and Emacs and unique identifier for Arch, a // version control system. Please do not modify past this point. // // Local Variables: // c-basic-offset: 3 // indent-tabs-mode: nil // End: // // vim: et sts=3 sw=3 // arch-tag: