Fixed-point FIR Filter Code Generator for ARM Assembly
/* **********************************************************************
*
* Fixed Point Filtering Library
*
* **********************************************************************
*
* lowpass_fir.S
*
* Jordan Rhee
* rhee.jordan@gmail.com
*
* IEEE UCSD
* http://ieee.ucsd.edu
*
* Generated with IEEE UCSD Fixed Pointer Filter Code Generator
* http://ieee.ucsd.edu/projects/qfilt.php
*
* **********************************************************************/
/*
* fixedp lowpass_fir(fixedp *w, fixedp x);
*
* Fixed point FIR filtering routine for ARM. Computes output y for
* input x. The output will have the same fracbits as the input.
* w: caller-allocated array for state storage. Should be length LENGTH+1.
* x: sample to filter
*
* Required data:
* LENGTH: number of coefficients
* .h: coefficient array
* H_FRACBITS: fracbits of coefficients
*
* r0: address of internal state array. w[LENGTH] contains
* index of head of circular buffer.
* r1: x
* r2: address of coefficient array (h)
* r3: j: index of current state value
* r4: i: index of current coefficient
* r5: h[i]: current filter coefficient
* r6: w[j]: current state value
* r7: long multiply lo word
* r8: long multiply hi word
*/
.set LENGTH, 20
.set H_FRACBITS, 30
.section .rodata
.align 4
.h:
.word 0xffc5ef57, 0xfeb3416c, 0xfdf673b8, 0xffc7fb45
.word 0x02b1826b, 0x0123c987, 0xfb542f40, 0xfc248828
.word 0x0ab1bf40, 0x1b3f7457, 0x1b3f7457, 0x0ab1bf40
.word 0xfc248828, 0xfb542f40, 0x0123c987, 0x02b1826b
.word 0xffc7fb45, 0xfdf673b8, 0xfeb3416c, 0xffc5ef57
.text
.arm
.global lowpass_fir
.func lowpass_fir
lowpass_fir:
push {r4-r8}
/* w(r0)[j(w[N])] = x */
ldr r3, [r0, #(4*LENGTH)] /* load value of j */
str r1, [r0, r3, lsl #2] /* store x into w[j] */
/* y = 0; */
mov r7, #0
mov r8, #0
/* load base address of coefficient array */
ldr r2, =.h
/* i = 0 */
mov r4, #0
cmp r4, #LENGTH
bge .endloop
.loop:
/* y += h[i] * w[j] */
ldr r5, [r2, r4, lsl #2] /* r5 = h[i] */
ldr r6, [r0, r3, lsl #2] /* r6 = w[j] */
smlal r7, r8, r5, r6 /* r8:r7 += h[i] * w[j] */
subs r3, r3, #1 /* j-- */
movmi r3, #(LENGTH - 1) /* if j == -1, then j = N-1 */
add r4, r4, #1 /* i++ */
cmp r4, #LENGTH /* is i less than N */
blt .loop
.endloop:
add r3, r3, #1 /* increment j and store back to memory */
cmp r3, #LENGTH
moveq r3, #0
str r3, [r0, #(4*LENGTH)] /* save new value of j */
mov r0, r7, lsr #H_FRACBITS /* shift lo word to the right by H_FRACBITS */
orr r0, r0, r8, lsl #(32 - H_FRACBITS) /* shift hi word to the right by H_FRACBITS and OR with lo word*/
pop {r4-r8}
bx lr
.endfunc
.end
Fixed-point IIR Filter Code Generator for ARM
/*
* fixedp lowpass_iir(fixedp *w, fixedp x);
*
* Fixed point IIR filtering routine for ARM. Computes output y for
* input x. The output will have the same fracbits as the input.
* w: caller-allocated array for state storage. Should be length 2*L.
* x: sample to filter
*
* Required data:
* LENGTH: number of sections
* .sos: sos matrix
* SOS_FRACBITS: sos fracbits
* .gain: scale values array
* G_FRACBITS: scale values fracbits
*
* Register usage:
* r0: address of internal state array (w)
* r1: x
* r2: address of SOS array
* r3: address of gain array
* r4: w0
* r5: w1
* r6: y
* r7: long multiply lo word
* r8: long multiply hi word
* r9: B1
* r10: B2
* r11: A1
* r12: A2
* r14: loop counter
*/
.set LENGTH, 3
.set SOS_FRACBITS, 30
.set G_FRACBITS, 31
.section .rodata
.align 4
.sos:
.word 0x49be7eaf, 0x40000000, 0xc4c9d93a, 0x251f228c
.word 0x1d81c8a5, 0x40000000, 0xdaa0b600, 0x37cef3c1
.word 0x40000000, 0x00000000, 0xd87b730c, 0x00000000
.gain:
.word 0x06b1dbb5, 0x42fe27a0, 0x613d5d5a
.text
.arm
.global lowpass_iir
.func lowpass_iir
lowpass_iir:
push {r4-r11, lr}
mov r14, #0 /* i = 0 */
ldr r2, =.sos /* load address of SOS matrix */
ldr r3, =.gain /* load address of gain coefficient array */
cmp r14, #LENGTH
bge .endloop
.loop:
/* load all the SOS coefficients we need into r8-r12 and increment the SOS pointer */
ldmia r2!, {r9-r12} /* B1, B2, A1, A2 */
/* x = gain[i]*x */
ldr r6, [r3], #4 /* load current element of gain array into r6 and increment by 4 */
smull r7, r8, r1, r6 /* 64-bit multiply: r5:r4 = x*gain[i]; */
mov r1, r7, lsr#G_FRACBITS /* shift lo word to the right by G_FRACBITS */
orr r1, r1, r8, lsl#(32 - G_FRACBITS) /* shift hi word to the right by G_FRACBITS and OR with lo word*/
/* load w0 and w1 into r4, r5, but do NOT increment */
ldm r0, {r4-r5}
/* y(r6) = x(r1) + w[W0](r4)*/
add r6, r1, r4
/* w0(r4) = .sos[B1](r9)*x(r1) - .sos[A1](r11)*y(r6) + w[W1](r5); */
rsb r11, r11, #0 /* .sos[A1] = -sos[A1] */
smull r7, r8, r9, r1 /* r8:r7 = sos[B1]*x */
smlal r7, r8, r11, r6 /* r8:r7 += -sos[A1]*y */
mov r4, r7, lsr#SOS_FRACBITS /* shift lo word to the right by SOS_FRACBITS */
orr r4, r4, r8, lsl#(32 - SOS_FRACBITS) /* shift hi word to the right by SOS_FRACBITS and OR with lo word*/
add r4, r4, r5 /* add w1 */
/* w2 = sos[B2]*x(r1) - .sos[A2](r12)*y(r6); */
rsb r12, r12, #0 /* .sos[A2] = -sos[A2] */
smull r7, r8, r10, r1 /* r8:r7 = sos[B2](r10)*x(r1) */
smlal r7, r8, r12, r6 /* r8:r7 += -sos[A2](r12)*y(r6) */
mov r5, r7, lsr#SOS_FRACBITS /* shift lo word to the right by SOS_FRACBITS */
orr r5, r5, r8, lsl#(32 - SOS_FRACBITS) /* shift hi word to the right by SOS_FRACBITS and OR with lo word*/
/* need to store w0, w1 back to memory and increment */
stmia r0!, {r4-r5}
/* x = y */
mov r1, r6
/* increment pointer and branch to top of loop */
add r14, r14, #1
cmp r14, #LENGTH
blt .loop
.endloop:
/* set return val, restore stack, and return */
mov r0, r6
pop {r4-r11, lr}
bx lr
.endfunc
.end
Fixed-point IIR Filter Code Generator for ARM
/*
* fixedp lowpass_iir(fixedp *w, fixedp x);
*
* Fixed point IIR filtering routine for ARM. Computes output y for
* input x. The output will have the same fracbits as the input.
* w: caller-allocated array for state storage. Should be length 2*L.
* x: sample to filter
*
* Required data:
* LENGTH: number of sections
* .sos: sos matrix
* SOS_FRACBITS: sos fracbits
* .gain: scale values array
* G_FRACBITS: scale values fracbits
*
* Register usage:
* r0: address of internal state array (w)
* r1: x
* r2: address of SOS array
* r3: address of gain array
* r4: w0
* r5: w1
* r6: y
* r7: long multiply lo word
* r8: long multiply hi word
* r9: B1
* r10: B2
* r11: A1
* r12: A2
* r14: loop counter
*/
.set LENGTH, 3
.set SOS_FRACBITS, 30
.set G_FRACBITS, 31
.section .rodata
.align 4
.sos:
.word 0x49be7eaf, 0x40000000, 0xc4c9d93a, 0x251f228c
.word 0x1d81c8a5, 0x40000000, 0xdaa0b600, 0x37cef3c1
.word 0x40000000, 0x00000000, 0xd87b730c, 0x00000000
.gain:
.word 0x06b1dbb5, 0x42fe27a0, 0x613d5d5a
.text
.arm
.global lowpass_iir
.func lowpass_iir
lowpass_iir:
push {r4-r11, lr}
mov r14, #0 /* i = 0 */
ldr r2, =.sos /* load address of SOS matrix */
ldr r3, =.gain /* load address of gain coefficient array */
cmp r14, #LENGTH
bge .endloop
.loop:
/* load all the SOS coefficients we need into r8-r12 and increment the SOS pointer */
ldmia r2!, {r9-r12} /* B1, B2, A1, A2 */
/* x = gain[i]*x */
ldr r6, [r3], #4 /* load current element of gain array into r6 and increment by 4 */
smull r7, r8, r1, r6 /* 64-bit multiply: r5:r4 = x*gain[i]; */
mov r1, r7, lsr#G_FRACBITS /* shift lo word to the right by G_FRACBITS */
orr r1, r1, r8, lsl#(32 - G_FRACBITS) /* shift hi word to the right by G_FRACBITS and OR with lo word*/
/* load w0 and w1 into r4, r5, but do NOT increment */
ldm r0, {r4-r5}
/* y(r6) = x(r1) + w[W0](r4)*/
add r6, r1, r4
/* w0(r4) = .sos[B1](r9)*x(r1) - .sos[A1](r11)*y(r6) + w[W1](r5); */
rsb r11, r11, #0 /* .sos[A1] = -sos[A1] */
smull r7, r8, r9, r1 /* r8:r7 = sos[B1]*x */
smlal r7, r8, r11, r6 /* r8:r7 += -sos[A1]*y */
mov r4, r7, lsr#SOS_FRACBITS /* shift lo word to the right by SOS_FRACBITS */
orr r4, r4, r8, lsl#(32 - SOS_FRACBITS) /* shift hi word to the right by SOS_FRACBITS and OR with lo word*/
add r4, r4, r5 /* add w1 */
/* w2 = sos[B2]*x(r1) - .sos[A2](r12)*y(r6); */
rsb r12, r12, #0 /* .sos[A2] = -sos[A2] */
smull r7, r8, r10, r1 /* r8:r7 = sos[B2](r10)*x(r1) */
smlal r7, r8, r12, r6 /* r8:r7 += -sos[A2](r12)*y(r6) */
mov r5, r7, lsr#SOS_FRACBITS /* shift lo word to the right by SOS_FRACBITS */
orr r5, r5, r8, lsl#(32 - SOS_FRACBITS) /* shift hi word to the right by SOS_FRACBITS and OR with lo word*/
/* need to store w0, w1 back to memory and increment */
stmia r0!, {r4-r5}
/* x = y */
mov r1, r6
/* increment pointer and branch to top of loop */
add r14, r14, #1
cmp r14, #LENGTH
blt .loop
.endloop:
/* set return val, restore stack, and return */
mov r0, r6
pop {r4-r11, lr}
bx lr
.endfunc
.end
Fixed-point FIR Filter Code Generator for ARM Assembly
/* **********************************************************************
*
* Fixed Point Filtering Library
*
* **********************************************************************
*
* lowpass_fir.S
*
* Jordan Rhee
* rhee.jordan@gmail.com
*
* IEEE UCSD
* http://ieee.ucsd.edu
*
* Generated with IEEE UCSD Fixed Pointer Filter Code Generator
* http://ieee.ucsd.edu/projects/qfilt.php
*
* **********************************************************************/
/*
* fixedp lowpass_fir(fixedp *w, fixedp x);
*
* Fixed point FIR filtering routine for ARM. Computes output y for
* input x. The output will have the same fracbits as the input.
* w: caller-allocated array for state storage. Should be length LENGTH+1.
* x: sample to filter
*
* Required data:
* LENGTH: number of coefficients
* .h: coefficient array
* H_FRACBITS: fracbits of coefficients
*
* r0: address of internal state array. w[LENGTH] contains
* index of head of circular buffer.
* r1: x
* r2: address of coefficient array (h)
* r3: j: index of current state value
* r4: i: index of current coefficient
* r5: h[i]: current filter coefficient
* r6: w[j]: current state value
* r7: long multiply lo word
* r8: long multiply hi word
*/
.set LENGTH, 20
.set H_FRACBITS, 30
.section .rodata
.align 4
.h:
.word 0xffc5ef57, 0xfeb3416c, 0xfdf673b8, 0xffc7fb45
.word 0x02b1826b, 0x0123c987, 0xfb542f40, 0xfc248828
.word 0x0ab1bf40, 0x1b3f7457, 0x1b3f7457, 0x0ab1bf40
.word 0xfc248828, 0xfb542f40, 0x0123c987, 0x02b1826b
.word 0xffc7fb45, 0xfdf673b8, 0xfeb3416c, 0xffc5ef57
.text
.arm
.global lowpass_fir
.func lowpass_fir
lowpass_fir:
push {r4-r8}
/* w(r0)[j(w[N])] = x */
ldr r3, [r0, #(4*LENGTH)] /* load value of j */
str r1, [r0, r3, lsl #2] /* store x into w[j] */
/* y = 0; */
mov r7, #0
mov r8, #0
/* load base address of coefficient array */
ldr r2, =.h
/* i = 0 */
mov r4, #0
cmp r4, #LENGTH
bge .endloop
.loop:
/* y += h[i] * w[j] */
ldr r5, [r2, r4, lsl #2] /* r5 = h[i] */
ldr r6, [r0, r3, lsl #2] /* r6 = w[j] */
smlal r7, r8, r5, r6 /* r8:r7 += h[i] * w[j] */
subs r3, r3, #1 /* j-- */
movmi r3, #(LENGTH - 1) /* if j == -1, then j = N-1 */
add r4, r4, #1 /* i++ */
cmp r4, #LENGTH /* is i less than N */
blt .loop
.endloop:
add r3, r3, #1 /* increment j and store back to memory */
cmp r3, #LENGTH
moveq r3, #0
str r3, [r0, #(4*LENGTH)] /* save new value of j */
mov r0, r7, lsr #H_FRACBITS /* shift lo word to the right by H_FRACBITS */
orr r0, r0, r8, lsl #(32 - H_FRACBITS) /* shift hi word to the right by H_FRACBITS and OR with lo word*/
pop {r4-r8}
bx lr
.endfunc
.end