/* Copyright (C) 2004 Florian Schmidt This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "convolve.h" #include #include #include int my_posix_memalign(void **memptr, size_t alignment, size_t size) { *memptr = malloc(size); if (*memptr != NULL) return 0; else return errno; } int convolution_init (convolution_t *conv, int number_of_responses, int number_of_response_channels, response_t **responses, unsigned int chunk_length, int split_channels_mode) { int index, index2, index3, index4; float tmp_float; if (conv == 0) return 0; tmp_float = 0; conv->split_channels_mode = split_channels_mode; conv->chunk_length = chunk_length; conv->number_of_responses = number_of_responses; conv->number_of_response_channels = number_of_response_channels; // conv->normalization_factor = sqrt(chunk_length * 2); conv->normalization_factor = chunk_length * 6; // conv->normalization_factor = (chunk_length * 2)*(chunk_length * 2); // allocate FFT buffers // as r2c stores only N/2+1 results, we don't need the padded size for this // conv->fft_complex = (fftwf_complex*)malloc (sizeof(fftwf_complex) * (conv->chunk_length + 1)); my_posix_memalign(&conv->fft_complex, 16, sizeof(fftwf_complex) * (conv->chunk_length + 1)); // this one still needs to be 2 * chunk_length as we zero pad it // conv->fft_real = (float*)malloc (sizeof(float) * 2 * conv->chunk_length); my_posix_memalign(&conv->fft_real, 16, sizeof(float) * 2 * conv->chunk_length); // create fftw plans conv->fft_plan_forward = fftwf_plan_dft_r2c_1d (2 * conv->chunk_length, conv->fft_real, conv->fft_complex, FFTW_MEASURE); conv->fft_plan_backward = fftwf_plan_dft_c2r_1d (2 * conv->chunk_length, conv->fft_complex, conv->fft_real, FFTW_MEASURE); // First Step: chunk, pad, fft and store the responses // allocate array of response pointers conv->fft_responses = (fft_response_t**)malloc (sizeof(fft_response_t*)*conv->number_of_responses); // process input responses to padded, chunked, fft'ed internal // representation conv->max_chunks = 0; for (index = 0; index < conv->number_of_responses; ++index) { // create new response struct conv->fft_responses[index] = (fft_response_t*)malloc (sizeof(fft_response_t)); conv->fft_responses[index]->length = responses[index]->length; // make channel_data array big enough to hold pointers for all channels conv->fft_responses[index]->channel_data = (fftwf_complex**)malloc (sizeof(fftwf_complex*) * conv->number_of_response_channels); // how many chunks do we need for this response? conv->fft_responses[index]->number_of_chunks = (int)ceil((responses[index]->length)/(float)(conv->chunk_length)); // new maximum? if (conv->fft_responses[index]->number_of_chunks > conv->max_chunks) { conv->max_chunks = conv->fft_responses[index]->number_of_chunks; } // allocate per channel data buffer) // no need for double size, as we use r2c/c2r for (index2 = 0; index2 < conv->number_of_response_channels; ++index2) { /* conv->fft_responses[index]->channel_data[index2] = (fftwf_complex*)malloc(sizeof(fftwf_complex) * (conv->chunk_length + 1) * conv->fft_responses[index]->number_of_chunks); */ my_posix_memalign(&conv->fft_responses[index]->channel_data[index2], 16, sizeof(fftwf_complex) * (conv->chunk_length + 1) * conv->fft_responses[index]->number_of_chunks); } // for each channel for (index2 = 0; index2 < conv->number_of_response_channels; ++index2) { // for each chunk for (index3 = 0; index3 < conv->fft_responses[index]->number_of_chunks; ++index3) { // copy original chunk to fft_real for (index4 = 0; index4 < conv->chunk_length; ++index4) { if (index4 + index3 * conv->chunk_length < conv->fft_responses[index]->length) { conv->fft_real[index4] // = responses[index]->channel_data[index2][index4+index3*chunk_length] / conv->normalization_factor; = responses[index]->channel_data[index2][index4+index3*chunk_length]; } else { conv->fft_real[index4] = 0; } // and zero pad the second half of the chunk at the same time conv->fft_real[index4+conv->chunk_length] = 0; } // do tha fft fftwf_execute (conv->fft_plan_forward); // copy fft output to our privat chunk store :) for (index4 = 0; index4 < conv->chunk_length + 1; ++index4) { conv->fft_responses[index]->channel_data[index2][index4+index3 * (conv->chunk_length + 1)][0] // = conv->fft_complex[index4][0] / conv->normalization_factor; = conv->fft_complex[index4][0]; conv->fft_responses[index]->channel_data[index2][index4+index3 * (conv->chunk_length + 1)][1] // = conv->fft_complex[index4][1] / conv->normalization_factor; = conv->fft_complex[index4][1]; } } } } // Second Step: // setup ringbuffers for storing FFT'ed input.. // the chunks are size chunk_length + 1 thus // we need number_of_chunks * (chunk_length + 1) // frames // one for each response, but one for every channel in every response // in split_channel_mode if (conv->split_channels_mode) { // printf("%i %i\n", conv->number_of_responses, conv->number_of_response_channels); conv->input_chunk_ringbuffers = (fftwf_complex**)malloc (sizeof(fftwf_complex*) * conv->number_of_responses * conv->number_of_response_channels); for (index = 0; index < conv->number_of_responses * conv->number_of_response_channels; ++index) { /* conv->input_chunk_ringbuffers[index] = (fftwf_complex*)malloc (sizeof(fftwf_complex) * conv->fft_responses[index]->number_of_chunks * (conv->chunk_length + 1)); */ // need to divide (integer divide) the index by number_of_responses to get // a valid index into the list of responses. my_posix_memalign(&conv->input_chunk_ringbuffers[index], 16, sizeof(fftwf_complex) * conv->fft_responses[index/conv->number_of_response_channels]->number_of_chunks * (conv->chunk_length + 1)); // zero out for (index2 = 0; index2 < (conv->chunk_length + 1) * conv->fft_responses[index/conv->number_of_response_channels]->number_of_chunks; ++index2) { conv->input_chunk_ringbuffers[index][index2][0] = 0; conv->input_chunk_ringbuffers[index][index2][1] = 0; } } } else { conv->input_chunk_ringbuffers = (fftwf_complex**)malloc (sizeof(fftwf_complex*)*conv->number_of_responses); for (index = 0; index < conv->number_of_responses; ++index) { /* conv->input_chunk_ringbuffers[index] = (fftwf_complex*)malloc (sizeof(fftwf_complex) * conv->fft_responses[index]->number_of_chunks * (conv->chunk_length + 1)); */ my_posix_memalign(&conv->input_chunk_ringbuffers[index], 16, sizeof(fftwf_complex) * conv->fft_responses[index]->number_of_chunks * (conv->chunk_length + 1)); // zero out for (index2 = 0; index2 < (conv->chunk_length + 1) * conv->fft_responses[index]->number_of_chunks; ++index2) { conv->input_chunk_ringbuffers[index][index2][0] = 0; conv->input_chunk_ringbuffers[index][index2][1] = 0; } } } // allocate input_chunk_ringbuffer_index arrays conv->input_chunk_ringbuffer_indexes = (int*)malloc (sizeof(int) * conv->number_of_responses); // zero out for (index = 0; index < conv->number_of_responses; ++index) { conv->input_chunk_ringbuffer_indexes[index] = 0; } // Third Step: setup overlap buffers // these need only be chunk_length sized conv->overlap_buffers = (float**)malloc (sizeof(float*) * conv->number_of_response_channels); for (index = 0; index < conv->number_of_response_channels; ++index) { // conv->overlap_buffers[index] = (float*)malloc (sizeof(float) * conv->chunk_length); my_posix_memalign(&conv->overlap_buffers[index], 16, sizeof(float) * conv->chunk_length); // zero out for (index2 = 0; index2 < conv->chunk_length; ++index2) { conv->overlap_buffers[index][index2] = 0; } } return 1; }