--- orig/nyquist/nyqsrc/convolve.c 2015-05-04 12:41:01.497976900 -0500 +++ nyquist/nyqsrc/convolve.c 2015-05-04 12:40:32.047737200 -0500 @@ -6,34 +6,6 @@ * of the first parameter. */ -/* Original convolve.c modified to do fast convolution. Here are some - * notes: - * The first arg is arbitrary length. The second arg is the impulse - * response, which is converted into a table. Tables have limited maximum - * size, which is good because we're going to use a single FFT for the - * whole impulse response. - * - * The fast convolution works like this: - * inputs are x_snd and h_snd. - * Make h_snd into a table ht of size N, where N is a power of 2. - * Copy ht with zero fill into H of size 2N. - * Compute FFT of H in place. - * Iterate: - * Copy N samples of x_snd into X and zero fill to size 2N. - * Compute FFT of X in place. - * Multiply X by H (result goes into X). - * Compute IFFT of X in place - * Add X to R. - * Now N samples of R can be output. - * Copy 2nd half of R to first half and zero the 2nd half. - * (this is actually done first, and the first time does - * nothing because R is initially filled with zeros) - * - * Length of output is length of x input + length of h - */ - -#define _USE_MATH_DEFINES 1 /* for Visual C++ to get M_LN2 */ -#include #include "stdio.h" #ifndef mips #include "stdlib.h" @@ -43,8 +15,6 @@ #include "falloc.h" #include "cext.h" -#include "fftlib.h" -#include "fftext.h" #include "convolve.h" void convolve_free(); @@ -58,13 +28,13 @@ long x_snd_cnt; sample_block_values_type x_snd_ptr; - sample_type *H; // the FFT of h_snd - int h_len; // true length of H - int N; // length of block, FFTs are of size 2*N - int M; // log2 of 2*N, the FFT size - sample_type *X; - sample_type *R; // result buffer where output is summed - sample_type *R_current; + table_type table; + sample_type *h_buf; + double length_of_h; + long h_len; + long x_buf_len; + sample_type *x_buffer_pointer; + sample_type *x_buffer_current; } convolve_susp_node, *convolve_susp_type; @@ -82,9 +52,8 @@ } -void convolve_s_fetch(snd_susp_type a_susp, snd_list_type snd_list) +void convolve_s_fetch(register convolve_susp_type susp, snd_list_type snd_list) { - convolve_susp_type susp = (convolve_susp_type) a_susp; int cnt = 0; /* how many samples computed */ int togo; int n; @@ -93,9 +62,13 @@ register sample_block_values_type out_ptr_reg; - sample_type *R = susp->R; - sample_type *R_current; - int N = susp->N; + register sample_type * h_buf_reg; + register long h_len_reg; + register long x_buf_len_reg; + register sample_type * x_buffer_pointer_reg; + register sample_type * x_buffer_current_reg; + register sample_type x_snd_scale_reg = susp->x_snd->scale; + register sample_block_values_type x_snd_ptr_reg; falloc_sample_block(out, "convolve_s_fetch"); out_ptr = out->samples; snd_list->block = out; @@ -104,60 +77,43 @@ /* first compute how many samples to generate in inner loop: */ /* don't overflow the output sample block: */ togo = max_sample_block_len - cnt; - /* if we need output samples, generate them here */ - if (susp->R_current >= R + N) { - /* Copy N samples of x_snd into X and zero fill to size 2N */ - int i = 0; - sample_type *X = susp->X; - sample_type *H = susp->H; - int to_copy; - while (i < N) { + + /* don't run past the x_snd input sample block: */ + /* based on susp_check_term_log_samples, but offset by h_len */ + + /* THIS IS EXPANDED BELOW + * susp_check_term_log_samples(x_snd, x_snd_ptr, x_snd_cnt); + */ if (susp->x_snd_cnt == 0) { susp_get_samples(x_snd, x_snd_ptr, x_snd_cnt); + + /* THIS IS EXPANDED BELOW + *logical_stop_test(x_snd, susp->x_snd_cnt); + */ if (susp->x_snd->logical_stop_cnt == susp->x_snd->current - susp->x_snd_cnt) { min_cnt(&susp->susp.log_stop_cnt, susp->x_snd, (snd_susp_type) susp, susp->x_snd_cnt); } - } + + /* THIS IS EXPANDED BELOW + * terminate_test(x_snd_ptr, x_snd, susp->x_snd_cnt); + */ if (susp->x_snd_ptr == zero_block->samples) { + /* ### modify this to terminate at an offset of (susp->h_len) */ + /* Note: in the min_cnt function, susp->x_snd_cnt is *subtracted* + * from susp->x_snd->current to form the terminate time, so to + * increase the time, we need to *subtract* susp->h_len, which + * due to the double negative, *adds* susp->h_len to the ultimate + * terminate time calculation. + */ min_cnt(&susp->terminate_cnt, susp->x_snd, - (snd_susp_type) susp, susp->x_snd_cnt); - /* extend the output to include impulse response */ - susp->terminate_cnt += susp->h_len; + (snd_susp_type) susp, susp->x_snd_cnt - susp->h_len); } - /* copy no more than the remaining space and no more than - * the amount remaining in the block - */ - to_copy = min(N - i, susp->x_snd_cnt); - memcpy(X + i, susp->x_snd_ptr, - to_copy * sizeof(*susp->x_snd_ptr)); - susp->x_snd_ptr += to_copy; - susp->x_snd_cnt -= to_copy; - i += to_copy; - } - /* zero fill to size 2N */ - memset(X + N, 0, N * sizeof(X[0])); - /* Compute FFT of X in place */ - fftInit(susp->M); - rffts(X, susp->M, 1); - /* Multiply X by H (result goes into X) */ - rspectprod(X, H, X, N * 2); - /* Compute IFFT of X in place */ - riffts(X, susp->M, 1); - /* Shift R, zero fill, add X, all in one loop */ - for (i = 0; i < N; i++) { - R[i] = R[i + N] + X[i]; - R[i + N] = X[i + N]; - } - /* now N samples of R can be output */ - susp->R_current = R; - } - /* compute togo, the number of samples to "compute" */ - /* can't use more than what's left in R. R_current is - the next sample of R, so what's left is N - (R - R_current) */ - R_current = susp->R_current; - togo = min(togo, N - (R_current - R)); + } + + + togo = min(togo, susp->x_snd_cnt); /* don't run past terminate time */ if (susp->terminate_cnt != UNKNOWN && @@ -166,23 +122,69 @@ if (togo == 0) break; } + /* don't run past logical stop time */ - if (!susp->logically_stopped && - susp->susp.log_stop_cnt != UNKNOWN && - susp->susp.log_stop_cnt <= susp->susp.current + cnt + togo) { - togo = susp->susp.log_stop_cnt - (susp->susp.current + cnt); - if (togo == 0) break; + if (!susp->logically_stopped && susp->susp.log_stop_cnt != UNKNOWN) { + int to_stop = susp->susp.log_stop_cnt - (susp->susp.current + cnt); + /* break if to_stop == 0 (we're at the logical stop) + * AND cnt > 0 (we're not at the beginning of the + * output block). + */ + if (to_stop < togo) { + if (to_stop == 0) { + if (cnt) { + togo = 0; + break; + } else /* keep togo as is: since cnt == 0, we + * can set the logical stop flag on this + * output block + */ + susp->logically_stopped = true; + } else /* limit togo so we can start a new + * block at the LST + */ + togo = to_stop; + } } n = togo; + h_buf_reg = susp->h_buf; + h_len_reg = susp->h_len; + x_buf_len_reg = susp->x_buf_len; + x_buffer_pointer_reg = susp->x_buffer_pointer; + x_buffer_current_reg = susp->x_buffer_current; + x_snd_ptr_reg = susp->x_snd_ptr; out_ptr_reg = out_ptr; if (n) do { /* the inner sample computation loop */ - *out_ptr_reg++ = (sample_type) *R_current++; + long i; double sum; + /* see if we've reached end of x_buffer */ + if ((x_buffer_pointer_reg + x_buf_len_reg) <= (x_buffer_current_reg + h_len_reg)) { + /* shift x_buffer from current back to base */ + for (i = 1; i < h_len_reg; i++) { + x_buffer_pointer_reg[i-1] = x_buffer_current_reg[i]; + } + /* this will be incremented back to x_buffer_pointer_reg below */ + x_buffer_current_reg = x_buffer_pointer_reg - 1; + } + + x_buffer_current_reg++; + + x_buffer_current_reg[h_len_reg - 1] = (x_snd_scale_reg * *x_snd_ptr_reg++); + + sum = 0.0; + for (i = 0; i < h_len_reg; i++) { + sum += x_buffer_current_reg[i] * h_buf_reg[i]; + } + + *out_ptr_reg++ = (sample_type) sum; } while (--n); /* inner loop */ - /* using R_current is a bad idea on RS/6000: */ - susp->R_current += togo; + susp->x_buffer_pointer = x_buffer_pointer_reg; + susp->x_buffer_current = x_buffer_current_reg; + /* using x_snd_ptr_reg is a bad idea on RS/6000: */ + susp->x_snd_ptr += togo; out_ptr += togo; + susp_took(x_snd_cnt, togo); cnt += togo; } /* outer loop */ @@ -202,9 +204,10 @@ } /* convolve_s_fetch */ -void convolve_toss_fetch(snd_susp_type a_susp, snd_list_type snd_list) +void convolve_toss_fetch(susp, snd_list) + register convolve_susp_type susp; + snd_list_type snd_list; { - convolve_susp_type susp = (convolve_susp_type) susp; time_type final_time = susp->susp.t0; long n; @@ -219,36 +222,32 @@ susp->x_snd_ptr += n; susp_took(x_snd_cnt, n); susp->susp.fetch = susp->susp.keep_fetch; - (*(susp->susp.fetch))(a_susp, snd_list); + (*(susp->susp.fetch))(susp, snd_list); } -void convolve_mark(snd_susp_type a_susp) +void convolve_mark(convolve_susp_type susp) { - convolve_susp_type susp = (convolve_susp_type) a_susp; sound_xlmark(susp->x_snd); } -void convolve_free(snd_susp_type a_susp) +void convolve_free(convolve_susp_type susp) { - convolve_susp_type susp = (convolve_susp_type) a_susp; - free(susp->R); - free(susp->X); - free(susp->H); - sound_unref(susp->x_snd); + table_unref(susp->table); + free(susp->x_buffer_pointer); sound_unref(susp->x_snd); ffree_generic(susp, sizeof(convolve_susp_node), "convolve_free"); } -void convolve_print_tree(snd_susp_type a_susp, int n) +void convolve_print_tree(convolve_susp_type susp, int n) { - convolve_susp_type susp = (convolve_susp_type) a_susp; indent(n); stdputstr("x_snd:"); sound_print_tree_1(susp->x_snd, n); } + sound_type snd_make_convolve(sound_type x_snd, sound_type h_snd) { register convolve_susp_type susp; @@ -256,38 +255,16 @@ time_type t0 = x_snd->t0; sample_type scale_factor = 1.0F; time_type t0_min = t0; - table_type table; - double log_len; falloc_generic(susp, convolve_susp_node, "snd_make_convolve"); - table = sound_to_table(h_snd); - susp->h_len = table->length; - log_len = log(table->length) / M_LN2; /* compute log-base-2(length) */ - susp->M = (int) log_len; - if (susp->M != log_len) susp->M++; /* round up */ - susp->N = 1 << susp->M; /* size of data blocks */ - susp->M++; /* M = log2(2 * N) */ - susp->H = (sample_type *) calloc(2 * susp->N, sizeof(susp->H[0])); - if (!susp->H) { - xlabort("memory allocation failure in convolve"); - } - memcpy(susp->H, table->samples, sizeof(susp->H[0]) * susp->N); - table_unref(table); /* don't need table now */ - /* remaining N samples are already zero-filled */ - if (fftInit(susp->M)) { - free(susp->H); - xlabort("fft initialization error in convolve"); - } - rffts(susp->H, susp->M, 1); - susp->X = (sample_type *) calloc(2 * susp->N, sizeof(susp->X[0])); - susp->R = (sample_type *) calloc(2 * susp->N, sizeof(susp->R[0])); - if (!susp->X || !susp->R) { - free(susp->H); - if (susp->X) free(susp->X); - if (susp->R) free(susp->R); - xlabort("memory allocation failed in convolve"); - } - susp->R_current = susp->R + susp->N; - susp->susp.fetch = &convolve_s_fetch; + susp->table = sound_to_table(h_snd); + susp->h_buf = susp->table->samples; + susp->length_of_h = susp->table->length; + susp->h_len = (long) susp->length_of_h; + h_reverse(susp->h_buf, susp->h_len); + susp->x_buf_len = 2 * susp->h_len; + susp->x_buffer_pointer = calloc((2 * (susp->h_len)), sizeof(float)); + susp->x_buffer_current = susp->x_buffer_pointer; + susp->susp.fetch = convolve_s_fetch; susp->terminate_cnt = UNKNOWN; /* handle unequal start times, if any */ if (t0 < x_snd->t0) sound_prepend_zeros(x_snd, t0);