DSPRelated.com
Forums

2D Convolution using FFTW

Started by Swati January 15, 2004
Hello everyone,

I am implementing a fast convolution program using FFTW library.  I
understand the constraints of the Discrete Convolution Theorem, and
the issues of wrap around error and periodicity.
 
Hence, I am taking the following approach at the moment.
 
/*-------------------*/
 
Image: image[sizeX][sizeY]
(Symmetrical)Filter: kernel[2k + 1][2k + 1]
 
Step(1) Define padX = sizeX + k; padY = sizeY + k;
 
Step (2) Pad image on two sides with zeros so that its size is padX *
padY.  I pad zeros on the right side and the bottom side of the image
buffer.
 
Step (3) Store kernel in a wrap around fashion with the size padX *
padY.
 
So, if Symmetrical (eg. Gaussian) kernel = [a, b, c
                                            d, e, f
                                            g, h, i] 
 
and padX = 8 and padY = 8
 
newKernel = [a, b, 0, 0, 0, 0, 0, c
             d, e, 0, 0, 0, 0, 0, f
             0, 0, 0, 0, 0, 0, 0, 0
             ....
             ....
             g,h, 0, 0, 0, 0, 0, i]
 
Step (4) Take FFT of newKernel and the image padded with zeros
Step (5) Do pixel by pixel complex multiplication of FFT output.
Step (6) Take inverse FFT of the output
Step (7) Read in the output
Step (8) Remove zero paddings. 
 
/*---------*/
 
 
Well, this only works for small kernel sizes, because when the kernel
size is bigger I get a shift in the blurred image.
 
I tried various other ways but I haven't been successful in resolving
this error.  So, I ask for your help.

The C++ code for the FFT functions is as follows:
(1)


/*-------Building Gaussian FFT--------------------*/

/*----Create Gaussian Kernel---------*/
inline void ConvolutionOld::CreateGaussianKernel()
{
 	float d;
 	int counter;
 	float normalise;
 	int i;
 	counter = 0;
 	int y,x;
 	normalise = 0;
 
 	///////////////////////////////
 	//-2 -1 0 1 2
 	//-2  x
 	//-1  x
 	// 0  x
 	// 1  x
 	// 2  x
 	///////////////////////////////
 	//e ^ -(x*x + y*y)/2 * sigma * sigma
 	////////////////////////////////
 	
 
 	//We only consider cases with k.kernelSide as odd
 
 	for(y = -((k.kernelSide - 1)/2); y <= (k.kernelSide - 1)/2; y++)
 	{	
 		for(x = -((k.kernelSide - 1)/2); x <= (k.kernelSide - 1)/2; x++)
 		{
 			d = (float)(x*x + y*y)/(float)(2.0f * k.sigma * k.sigma);
 			kernel[counter] = pow(e,-d);
 			normalise += kernel[counter];
 			counter++;
 		}
 	}

 
 	for(i = 0; i < k.kernelSide*k.kernelSide; i++)
 		kernel[i] = kernel[i]/normalise;	

	
}
	



inline void ConvolutionOld::ComputeKernelFFT()
{	
	float *kernelRow;
	float *kernelColumn;
	int y,x;

	kernelRow = new float[padX * k.kernelSide];
	kernelColumn = new float[padSize];
        
        /*---------Perform padding with zeros in wrap around
fashion-------*/
	WrapPadKernelRows(kernel, kernelRow);
	WrapPadKernelColumns(kernelRow, kernelColumn);


	kernel_FFT = new zomplex*[padY];
	
	for(y = 0; y < padY; y++)
	{	
		kernel_FFT[y] = new zomplex[padX];
		for(x = 0; x < padX; x++)
		{
			kernel_FFT[y][x].im = 0;
			kernel_FFT[y][x].re = kernelColumn[x + y*padX];
		}
	}
	
	//Compute the FFT of the Gaussian Kernel
	ComputeFFT(kernel_FFT, -1); 	
	

	//Free the kernels that are no longer required
	delete [] kernelRow;
	delete [] kernelColumn;

}


/*--------------Wrap the kernel along rows-----------------*/
inline void ConvolutionOld::WrapPadKernelRows(float *kernel, float
*kernelRow)
{
 int y,x;

 for(y = 0; y < k.kernelSide; y++)
{
  for(x = 0; x <= (k.kernelSide - 1)/2; x++)
    kernelRow[x + y*padX] = kernel[x + y*k.kernelSide];

  for(x = (k.kernelSide - 1)/2 + 1; x < padX - (k.kernelSide - 1)/2;
x++)
    kernelRow[x + y * padX] = 0;

  for(x = 1; x <= (k.kernelSide - 1)/2 ; x++)
    kernelRow[(padX - x) + y*padX] = kernel[(k.kernelSide - x) +
y*k.kernelSide];
	}
}



/*---------------Wrap the kernelRow along columns--------------*/
inline void ConvolutionOld::WrapPadKernelColumns(float *kernel, float
*kernelColumn)
{
 int y,x;

 for(y = 0; y <= (k.kernelSide - 1)/2; y++)
 {
   for(x = 0; x < padX; x++)
	kernelColumn[x + y*padX] = kernel[x + y*padX];
 }

 for(y = (k.kernelSide - 1)/2 + 1; y < padY - (k.kernelSide - 1)/2;
y++)
 {
   for(x = 0; x < padX; x++)
        kernelColumn[x + y*padX] = 0;
 }

 for(y = 1; y <= (k.kernelSide - 1)/2; y++)
 {
   for(x = 0; x < padX; x++)
	kernelColumn[x + (padY - y) * padX] = kernel[x + (k.kernelSide -
y)*padX];
 }
}




(2) /*-----Function to compute FFT using FFTW
library----------------*/


inline void ConvolutionOld::ComputeFFT(zomplex** array, int inv)
{
	fftw_plan p;
	int y,x;
	fftw_complex *carray;
	
	carray = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) *
padSize);

	for(y = 0; y < padY; y++)
	{
		for(x = 0; x < padX; x++)
		{	
			carray[x + y * padX][0] = array[y][x].re;
			carray[x + y * padX][1] = array[y][x].im;
		}
	}

	
	p = fftw_plan_dft_2d(padY, padX, carray, carray, inv, FFTW_ESTIMATE);
	fftw_execute(p);
	
	for(y = 0; y < padY; y++)
	{
		for(x = 0; x < padX; x++)
		{
			array[y][x].re = carray[x + y * padX][0];
			array[y][x].im = carray[x + y * padX][1];
		}
	}
	
	fftw_destroy_plan(p);	
	fftw_free(carray);
}



(3)/*----------Function to compute image FFT-------------*/

/*----------Pad the image with zeros of the right and
bottom--------------*/
inline void ConvolutionOld::ZeroPadImage()
{
	int y,x;
	image_FFT = new zomplex*[padY]; 


	
	for(y = 0; y < sizeY; y++)
	{	
		image_FFT[y] = new zomplex[padX];
		for(x = 0; x < sizeX; x++)
		{
			image_FFT[y][x].re = image[x + y*sizeX];
			image_FFT[y][x].im = 0;
		}

		for(x = sizeX; x < padX; x++)
		{
			image_FFT[y][x].re = 0.0f;
			image_FFT[y][x].im = 0.0f;
		}
	}


	for(y = sizeY; y < padY; y++)
	{
		image_FFT[y] = new zomplex[padX];
		
		for(x = 0; x < padX; x++)
		{
			image_FFT[y][x].re = 0.0f;
			image_FFT[y][x].im = 0.0f;
		}
	}


		
	ComputeFFT(image_FFT, -1);

}


(4) /*------Function to perform convolution and take inverse FFT----*/
inline void ConvolutionOld::Convolve(float *output)
{
	
	int i,y,x;
	convolution_FFT = new zomplex*[padY];	

	//Do per pixel multiplication of the gaussian kernel and the image
matrix
	for(y = 0; y < padY; y++)
	{	
		convolution_FFT[y] = new zomplex[padX];
		for(x = 0; x < padX; x++)
		{
			convolution_FFT[y][x].re = (image_FFT[y][x].re *
kernel_FFT[y][x].re) - (image_FFT[y][x].im * kernel_FFT[y][x].im);
			convolution_FFT[y][x].im = (image_FFT[y][x].re *
kernel_FFT[y][x].im) + (image_FFT[y][x].im * kernel_FFT[y][x].re);
		}

	}

	ComputeFFT(convolution_FFT, 1); //Find inverse FFT
	

	for(y = 0; y < sizeY; y++)
	{	
		for(x = 0; x < sizeX; x++)
			output[x + y*sizeX] = convolution_FFT[y][x].re;

	}
	
}




This is a lot of code to post, but I am unable to understand the cause
of the error.  If someone has implemented 2D convolution using FFTW
library and knows about how or how not do zero paddings, please help
me.

-Swati
>(4) /*------Function to perform convolution and take inverse FFT----*/ >inline void ConvolutionOld::Convolve(float *output) >{ > > int i,y,x; > convolution_FFT = new zomplex*[padY]; > > //Do per pixel multiplication of the gaussian kernel and the image >matrix > for(y = 0; y < padY; y++) > { > convolution_FFT[y] = new zomplex[padX]; > for(x = 0; x < padX; x++) > { > convolution_FFT[y][x].re = (image_FFT[y][x].re * >kernel_FFT[y][x].re) - (image_FFT[y][x].im * kernel_FFT[y][x].im); > convolution_FFT[y][x].im = (image_FFT[y][x].re * >kernel_FFT[y][x].im) + (image_FFT[y][x].im * kernel_FFT[y][x].re); > } > > } > > ComputeFFT(convolution_FFT, 1); //Find inverse FFT > > > for(y = 0; y < sizeY; y++) > { > for(x = 0; x < sizeX; x++) > output[x + y*sizeX] = convolution_FFT[y][x].re; > > } > >} > > > > >This is a lot of code to post, but I am unable to understand the cause >of the error. If someone has implemented 2D convolution using FFTW >library and knows about how or how not do zero paddings, please help >me. > >-Swati >
You need to scale in your Convolution function - scale = 1.0 / (sizeX * sizeY); convolution_FFT[y][x].re = (image_FFT[y][x].re * kernel_FFT[y][x].re) - (image_FFT[y][x].im * kernel_FFT[y][x].im) * scale; convolution_FFT[y][x].im = (image_FFT[y][x].re * kernel_FFT[y][x].im) + (image_FFT[y][x].im * kernel_FFT[y][x].re) * scale; Otherwise this looks good. Sorry no one got to this sooner, thought I'd reply in case someone found it who was having a similar issue. More on fftw: http://www.fftw.org/fftw2_doc/fftw_2.html - Pat