/*************************************************************************/ /* */ /* Centre for Speech Technology Research */ /* University of Edinburgh, UK */ /* Copyright (c) 1994,1995,1996 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and distribute */ /* this software and its documentation without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of this work, and to */ /* permit persons to whom this work is furnished to do so, subject to */ /* the following conditions: */ /* 1. The code must retain the above copyright notice, this list of */ /* conditions and the following disclaimer. */ /* 2. Any modifications must be clearly marked as such. */ /* 3. Original authors' names are not deleted. */ /* 4. The authors' names are not used to endorse or promote products */ /* derived from this software without specific prior written */ /* permission. */ /* */ /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */ /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */ /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ /* THIS SOFTWARE. */ /* */ /*************************************************************************/ /* Author : Paul Taylor */ /* Date : December 96 */ /*-----------------------------------------------------------------------*/ /* Spectrogram Generation */ /* */ /*=======================================================================*/ #include #include #include /* needed for FLT_MAX */ #include "EST_error.h" #include "EST_Track.h" #include "EST_Wave.h" #include "sigpr/EST_Window.h" #include "EST_Option.h" #include "sigpr/EST_fft.h" #include "sigpr/EST_spectrogram.h" #include "sigpr/EST_misc_sigpr.h" void make_spectrogram(EST_Wave &sig, EST_Track &sp, EST_Features &op) { EST_Wave psig; EST_pre_emphasis(sig, psig, op.F("preemph")); // calculate raw spectrogram raw_spectrogram(sp, psig, op.F("frame_length"), op.F("frame_shift"), op.I("frame_order"), op.present("slow_fft")); if (op.present("raw")) { cout << "no scaling\n"; return; } // coerce the values so as to emphasis important features if (op.present("sp_range") || op.present("sp_wcut") || op.present("sp_bcut")) { if (!op.present("sp_range")) op.set("sp_range", 1.0); if (!op.present("sp_wcut")) op.set("sp_wcut", 1.0); if (!op.present("sp_bcut")) op.set("sp_bcut", 0.0); scale_spectrogram(sp, op.F("sp_range"),op.F("sp_wcut"),op.F("sp_bcut")); } } void scale_spectrogram(EST_Track &sp, float range, float wcut, float bcut) { float max, min, scale, v; int i, j; max = -FLT_MIN; min = FLT_MAX; // find min and max values for (i = 0; i < sp.num_frames(); ++i) for (j = 0; j < sp.num_channels(); ++j) { float vv = sp.a_no_check(i, j); if (vv > max) max = vv; if (vv < min) min = vv; } scale = (max - min); // for every value: // 1. Effectively scale in range 0 to 1 // 2. Impose white and black cut offs // 3. Rescale to 0 and 1 // 4. scale to fit in "range" // this can obviously be done more efficiently float mag = (float)range / (float)(bcut - wcut); for (i = 0; i < sp.num_frames(); ++i) for (j = 0; j < sp.num_channels(); ++j) { v = (((sp.a_no_check(i, j) - min) / scale) - wcut) * mag; if (v > range) v = range; if (v < 0.0) v = 0.0; sp.a_no_check(i, j) = v; } } void raw_spectrogram(EST_Track &sp, EST_Wave &sig, float length, float shift, int order, bool slow) { int frame_length = (int) (length * (float) sig.sample_rate() +0.5); int frame_shift = (int) (shift * (float) sig.sample_rate() +0.5); EST_WindowFunc *make_window = EST_Window::creator("hamming"); // sanity check, we can't analyse more signal than order allows. if (frame_length > order) { EST_warning("frame_length reduced to %f (%d samples) to fit order\n", (float)order/(float) sig.sample_rate(), order); frame_length=order; } // enough frames to cover the entire signal int num_frames= (int)ceil(sig.num_samples()/(float)frame_shift); // spectrogram gets order/2 powers, the moduli of order/2 // complex numbers sp.resize(num_frames, order/2, FALSE); EST_FVector real(order); EST_FVector imag(order); // create the window shape EST_TBuffer window_vals(frame_length); make_window(frame_length, window_vals,-1); for (int k = 0 ; k < num_frames ; k++) { int pos = frame_shift * k; int window_start = pos - frame_length/2; real.empty(); // imag not used in old FFT code if (slow) imag.empty(); EST_Window::window_signal(sig, window_vals, window_start, frame_length, real, FALSE); int state = slow?power_spectrum_slow(real, imag):power_spectrum(real, imag); if (state != 0) { fprintf(stderr, "FFT Failed for frame %d\n", k); for (int i = 0; i < order /2; ++i) sp.a_no_check(k, i) = 0; } else sp.copy_frame_in(k, real); } sp.fill_time(shift); }