Fixed-point FIR Filter Code Generator for ARM Assembly

Jordan November 30, 2010 Coded in ASM for the ARM

This is a fixed-point FIR filtering routine written in ARM assembly. We also provide an online tool that takes a Matlab coefficient file as input and generates a source file, header file, and example code as output. The code generator also works on IIR filters.


Detailed writeup and tutorial: http://ieee.ucsd.edu/wiki/tutorials:fixed_point_filtering_library

Code generator: http://ieee.ucsd.edu/projects/qfilt/


Note that while the example below has a lowpass filter response, the code can be used with any filter response by changing the coefficients.

The detailed writeup also contains flavors written in C and C++. The C version is the most versatile, but uses more memory and instructions than the C++ or ARM version. The C++ version uses templates to eliminate a few pointers and constants by evaluating them at compile time. The hand-written ARM assembly version is the fastest and most memory efficient, and leverages the smlal 64-bit single cycle multiply accumulate instruction. 


/* **********************************************************************
 * Fixed Point Filtering Library
 * **********************************************************************
 * lowpass_fir.S
 * Jordan Rhee
 * rhee.jordan@gmail.com
 * http://ieee.ucsd.edu
 * Generated with IEEE UCSD Fixed Pointer Filter Code Generator
 * http://ieee.ucsd.edu/projects/qfilt.php
 * **********************************************************************/

 * fixedp lowpass_fir(fixedp *w, fixedp x);
 * Fixed point FIR filtering routine for ARM. Computes output y for
 * input x. The output will have the same fracbits as the input.
 *  w: caller-allocated array for state storage. Should be length LENGTH+1.
 *  x: sample to filter
 * Required data:
 *   LENGTH: number of coefficients
 *   .h: coefficient array
 *   H_FRACBITS: fracbits of coefficients
 *   r0: address of internal state array. w[LENGTH] contains
 *       index of head of circular buffer.
 *   r1: x
 *   r2: address of coefficient array (h)
 *   r3: j: index of current state value
 *   r4: i: index of current coefficient
 *   r5: h[i]: current filter coefficient
 *   r6: w[j]: current state value
 *   r7: long multiply lo word
 *   r8: long multiply hi word

.set LENGTH,  20
.set H_FRACBITS,  30

.section .rodata
.align 4


    .word 0xffc5ef57, 0xfeb3416c, 0xfdf673b8, 0xffc7fb45
    .word 0x02b1826b, 0x0123c987, 0xfb542f40, 0xfc248828
    .word 0x0ab1bf40, 0x1b3f7457, 0x1b3f7457, 0x0ab1bf40
    .word 0xfc248828, 0xfb542f40, 0x0123c987, 0x02b1826b
    .word 0xffc7fb45, 0xfdf673b8, 0xfeb3416c, 0xffc5ef57


.global	lowpass_fir
.func   lowpass_fir
	push {r4-r8}
	/* w(r0)[j(w[N])] = x */
	ldr  r3, [r0, #(4*LENGTH)]		/* load value of j */
	str  r1, [r0, r3, lsl #2]			/* store x into w[j] */

	/* y = 0; */
	mov r7, #0
	mov r8, #0

	/* load base address of coefficient array */
	ldr r2, =.h

	/* i = 0 */
	mov r4, #0
	cmp r4, #LENGTH
	bge .endloop

	/* y += h[i] * w[j] */
	ldr    r5, [r2, r4, lsl #2]					/* r5 = h[i] */
	ldr    r6, [r0, r3, lsl #2]					/* r6 = w[j] */
	smlal  r7, r8, r5, r6						/* r8:r7 += h[i] * w[j] */
	subs r3, r3, #1								/* j-- */
	movmi r3, #(LENGTH - 1)						/* if j == -1, then j = N-1 */

	add   r4, r4, #1							/* i++ */
	cmp r4, #LENGTH								/* is i less than N */
	blt .loop

	add r3, r3, #1								/* increment j and store back to memory */
	cmp r3, #LENGTH
	moveq r3, #0
	str r3, [r0, #(4*LENGTH)]					/* save new value of j */
	mov    r0, r7, lsr #H_FRACBITS				/* shift lo word to the right by H_FRACBITS */
	orr    r0, r0, r8, lsl #(32 - H_FRACBITS)	/* shift hi word to the right by H_FRACBITS and OR with lo word*/

	pop {r4-r8}
	bx  lr