Fixed-point Math Library

Jordan November 30, 2010 Coded in C
/* **********************************************************************
 *
 * Fixed Point Math Library
 *
 * **********************************************************************
 *
 * qmath.h
 *
 * Alex Forencich
 * alex@alexelectronics.com
 *
 * Jordan Rhee
 * rhee.jordan@gmail.com
 *
 * IEEE UCSD
 * http://ieee.ucsd.edu
 *
 * **********************************************************************/

#ifndef _QMATH_H_
#define _QMATH_H_

#ifdef __cplusplus
extern "C" {
#endif

#ifndef INLINE
#ifdef _MSC_VER
#define INLINE __inline
#else
#define INLINE inline
#endif /* _MSC_VER */
#endif /* INLINE */

/*
 * Default fractional bits. This precision is used in the routines
 * and macros without a leading underscore. 
 * For example, if you are mostly working with values that come from
 * a 10-bit A/D converter, you may want to choose 21. This leaves 11
 * bits for the whole part, which will help avoid overflow in addition.
 * On ARM, bit shifts require a single cycle, so all fracbits 
 * require the same amount of time to compute and there is no advantage
 * to selecting fracbits that are a multiple of 8.
 */
#define	FIXED_FRACBITS 24

#define FIXED_RESOLUTION (1 << FIXED_FRACBITS)
#define FIXED_INT_MASK (0xffffffffL << FIXED_FRACBITS)
#define FIXED_FRAC_MASK (~FIXED_INT_MASK)

// square roots
#define FIXED_SQRT_ERR 1 << (FIXED_FRACBITS - 10)

// fixedp2a
#define FIXED_DECIMALDIGITS 6

typedef long fixedp;

// conversions for arbitrary fracbits
#define _short2q(x, fb)			((fixedp)((x) << (fb)))
#define _int2q(x, fb)			((fixedp)((x) << (fb)))
#define _long2q(x, fb)			((fixedp)((x) << (fb)))
#define _float2q(x, fb)			((fixedp)((x) * (1 << (fb))))
#define _double2q(x, fb)		((fixedp)((x) * (1 << (fb))))

// conversions for default fracbits
#define short2q(x)			_short2q(x, FIXED_FRACBITS)
#define int2q(x)			_int2q(x, FIXED_FRACBITS)
#define long2q(x)			_long2q(x, FIXED_FRACBITS)
#define float2q(x)			_float2q(x, FIXED_FRACBITS)
#define double2q(x)			_double2q(x, FIXED_FRACBITS)

// conversions for arbitrary fracbits
#define _q2short(x, fb)		((short)((x) >> (fb)))
#define _q2int(x, fb)		((int)((x) >> (fb)))
#define _q2long(x, fb)		((long)((x) >> (fb)))
#define _q2float(x, fb)		((float)(x) / (1 << (fb)))
#define _q2double(x, fb)	((double)(x) / (1 << (fb)))

// conversions for default fracbits
#define q2short(x)			_q2short(x, FIXED_FRACBITS)
#define q2int(x)			_q2int(x, FIXED_FRACBITS)
#define q2long(x)			_q2long(x, FIXED_FRACBITS)
#define q2float(x)			_q2float(x, FIXED_FRACBITS)
#define q2double(x)			_q2double(x, FIXED_FRACBITS)

// evaluates to the whole (integer) part of x
#define qipart(x)			q2long(x)

// evaluates to the fractional part of x
#define qfpart(x)			((x) & FIXED_FRAC_MASK)

/*
 * Constants
 */
#define _QPI      3.1415926535897932384626433832795
#define QPI      double2q(_QPI)
#define _Q2PI     6.283185307179586476925286766559
#define Q2PI     double2q(_Q2PI)
#define _QPIO2    1.5707963267948966192313216916398
#define QPIO2    double2q(_QPIO2)
#define _QPIO4    0.78539816339744830961566084581988
#define QPIO4    double2q(_QPIO4)
#define _QLN_E    2.71828182845904523536
#define QLN_E    double2q(_QLN_E)
#define _QLN_10   2.30258509299404568402
#define QLN_10   double2q(_QLN_10)
#define _Q1OLN_10 0.43429448190325182765
#define Q1OLN_10 double2q(_Q1OLN_10)

// Both operands in addition and subtraction must have the same fracbits.
// If you need to add or subtract fixed point numbers with different
// fracbits, then use q2q to convert each operand beforehand.
#define qadd(a, b) ((a) + (b))
#define qsub(a, b) ((a) - (b))

/**
 * q2q - convert one fixed point type to another
 * x - the fixed point number to convert
 * xFb - source fracbits
 * yFb - destination fracbits
 */
static INLINE fixedp q2q(fixedp x, int xFb, int yFb)
{
	if(xFb == yFb) {
		return x;
	} else if(xFb < yFb) {
		return x << (yFb - xFb);
	} else {
		return x >> (xFb - yFb);
	}
}

/**
 * Multiply two fixed point numbers with arbitrary fracbits
 * x - left operand
 * y - right operand
 * xFb - number of fracbits for X
 * yFb - number of fracbits for Y
 * resFb - number of fracbits for the result
 * 
 */
#define _qmul(x, y, xFb, yFb, resFb) ((fixedp)(((long long)(x) * (long long)(y)) >> ((xFb) + (yFb) - (resFb))))

/**
 * Fixed point multiply for default fracbits.
 */
#define qmul(x, y) _qmul(x, y, FIXED_FRACBITS, FIXED_FRACBITS, FIXED_FRACBITS)

/**
 * divide
 * shift into 64 bits and divide, then truncate
 */
#define _qdiv(x, y, xFb, yFb, resFb) ((fixedp)((((long long)x) << ((xFb) + (yFb) - (resFb))) / y))

/**
 * Fixed point divide for default fracbbits.
 */
#define qdiv(x, y) _qdiv(x, y, FIXED_FRACBITS, FIXED_FRACBITS, FIXED_FRACBITS)

/**
 * Invert a number (x^-1) for arbitrary fracbits
 */
#define _qinv(x, xFb, resFb) ((fixedp)((((long long)1) << (xFb + resFb)) / x))

/**
 * Invert a number with default fracbits.
 */
#define qinv(x) _qinv(x, FIXED_FRACBITS, FIXED_FRACBITS);

/**
 * Modulus. Modulus is only defined for two numbers of the same fracbits
 */
#define qmod(x, y) ((x) % (y))

/**
 * Absolute value. Works for any fracbits.
 */
#define qabs(x) (((x) < 0) ? (-x) : (x))

/**
 * Floor for arbitrary fracbits
 */
#define _qfloor(x, fb) ((x) & (0xffffffff << (fb)))

/**
 * Floor for default fracbits
 */
#define qfloor(x) _qfloor(x, FIXED_FRACBITS)

/**
 * ceil for arbitrary fracbits.
 */
static INLINE fixedp _qceil(fixedp x, int fb)
{
	// masks off fraction bits and adds one if there were some fractional bits
	fixedp f = _qfloor(x, fb);
	if (f != x) return qadd(f, _int2q(1, fb));
	return x;
}

/**
 * ceil for default fracbits
 */
#define qceil(x) _qceil(x, FIXED_FRACBITS)

/**
 * square root for default fracbits
 */
fixedp qsqrt(fixedp p_Square);

/**
 * log (base e) for default fracbits
 */
fixedp qlog( fixedp p_Base );

/**
 * log base 10 for default fracbits
 */
fixedp qlog10( fixedp p_Base );

/**
 * exp (e to the x) for default fracbits
 */
fixedp qexp(fixedp p_Base);

/**
 * pow for default fracbits
 */
fixedp qpow( fixedp p_Base, fixedp p_Power );

/**
 * sine for default fracbits
 */
fixedp qsin(fixedp theta);

/**
 * cosine for default fracbits
 */
fixedp qcos(fixedp theta);

/**
 * tangent for default fracbits
 */
fixedp qtan(fixedp theta);

/**
 * fixedp2a - converts a fixed point number with default fracbits to a string
 */
char *q2a(char *buf, fixedp n);

#ifdef __cplusplus
}	// extern C
#endif

#endif /* _QMATH_H_ */

/* **********************************************************************
 *
 * Fixed Point Math Library
 *
 * **********************************************************************
 *
 * qmath.c
 *
 * Alex Forencich
 * alex@alexelectronics.com
 *
 * Jordan Rhee
 * rhee.jordan@gmail.com
 *
 * IEEE UCSD
 * http://ieee.ucsd.edu
 *
 * **********************************************************************/

#include "qmath.h"

/**
 * square root
 */
fixedp qsqrt(fixedp p_Square)
{
	fixedp   res;
	fixedp   delta;
	fixedp   maxError;

	if ( p_Square <= 0 )
		return 0;

	/* start at half */
	res = (p_Square >> 1);

	/* determine allowable error */
	maxError =  qmul(p_Square, FIXED_SQRT_ERR);

	do
	{
		delta =  (qmul( res, res ) - p_Square);
		res -=  qdiv(delta, ( res << 1 ));
	}
	while ( delta > maxError || delta < -maxError );

	return res;
}

/**
 * log (base e)
 */
fixedp qlog( fixedp p_Base )
{
    fixedp w = 0;
	fixedp y = 0;
	fixedp z = 0;
	fixedp num = int2q(1);
	fixedp dec = 0;

	if ( p_Base == int2q(1) )
		return 0;

	if ( p_Base == 0 )
		return 0xffffffff;

	for ( dec=0 ; qabs( p_Base ) >= int2q(2) ; dec += int2q(1) )
		p_Base = qdiv(p_Base, QLN_E);

	p_Base -= int2q(1);
	z = p_Base;
	y = p_Base;
	w = int2q(1);

	while ( y != y + w )
	{
		z = 0 - qmul( z , p_Base );
		num += int2q(1);
		w = qdiv( z , num );
		y += w;
	}

	return y + dec;
}

/**
 * log base 10
 */
fixedp qlog10( fixedp p_Base )
{
    // ln(p_Base)/ln(10)
    // more accurately, ln(p_Base) * (1/ln(10))
    return qmul(qlog(p_Base), Q1OLN_10);
}

/**
 * exp (e to the x)
 */
fixedp qexp(fixedp p_Base)
{
	fixedp w;
	fixedp y;
	fixedp num;

	for ( w=int2q(1), y=int2q(1), num=int2q(1) ; y != y+w ; num += int2q(1) )
	{
		w = qmul(w, qdiv(p_Base, num));
		y += w;
	}

	return y;
}

/**
 * pow
 */
fixedp qpow( fixedp p_Base, fixedp p_Power )
{
	if ( p_Base < 0 && qmod(p_Power, int2q(2)) != 0 )
		return - qexp( qmul(p_Power, qlog( -p_Base )) );
	else
		return qexp( qmul(p_Power, qlog(qabs( p_Base ))) );
}

/**
 * sinx, internal sine function
 */
static fixedp sinx(fixedp x)
{
	fixedp xpwr;
	long xftl;
	fixedp xresult;
	short positive;
	int i;

	xresult = 0;
	xpwr = x;
	xftl = 1;
	positive = -1;

	// Note: 12! largest for long
	for (i = 1; i < 7; i+=2)
	{
		if ( positive )
			xresult += qdiv(xpwr, long2q(xftl));
		else
			xresult -= qdiv(xpwr, long2q(xftl));

		xpwr = qmul(x, xpwr);
		xpwr = qmul(x, xpwr);
		xftl *= i+1;
		xftl *= i+2;
		positive = !positive;
	}

	return xresult;
}

/**
 * sine
 */
fixedp qsin(fixedp theta)
{
	fixedp xresult;
	short bBottom = 0;
	//static fixed xPI = XPI;
	//static fixed x2PI = X2PI;
	//static fixed xPIO2 = XPIO2;

	fixedp x = qmod(theta, Q2PI);
	if ( x < 0 )
		x += Q2PI;

	if ( x > QPI )
	{
		bBottom = -1;
		x -= QPI;
	}

	if ( x <= QPIO2 )
		xresult = sinx(x);
	else
		xresult = sinx(QPIO2-(x-QPIO2));

	if ( bBottom )
		return -xresult;

	return xresult;
}

/**
 * cosine
 */
fixedp qcos(fixedp theta)
{
	return qsin(theta + QPIO2);
}

/**
 * tangent
 */
fixedp qtan(fixedp theta)
{
	return qdiv(qsin(theta), qcos(theta));
}

/**
 * q2a - converts a fixed point number to a string
 */
char *q2a(char *buf, fixedp n)
{
	long ipart = qipart(n);
	long fpart = qfpart(n);
	long intdigits = 0;

	int i = 0;
	int j = 0;
	int d = 0;

	int offset = 0;

	long v;
	long t;
	long st = 0;

	if (n != 0)
	{
		intdigits = qipart(qceil(qlog10(qabs(n))));
	}

	if (intdigits < 1) intdigits = 1;

	offset = intdigits - 1;

	if (n < 0)
	{
		buf[0] = '-';
		offset++;
		n = -n;
		ipart = -ipart;
		fpart = -fpart;
	}

	// integer portion

	for (i = 0; i < intdigits; i++)
	{
		j = offset - i;
		d = ipart % 10;
		ipart = ipart / 10;
		buf[j] = '0' + d;
	}

	// decimal point

	buf[offset + 1] = '.';

	// fractional portion

	v = 5;
	t = FIXED_RESOLUTION >> 1;

	for (i = 0; i < FIXED_DECIMALDIGITS - 1; i++)
	{
		v = (v << 1) + (v << 3);
	}

	for (i = 0; i < FIXED_FRACBITS; i++)
	{
		if (fpart & t)
		{
			st += v;
		}
		v = v >> 1;
		t = t >> 1;
	}

	offset += FIXED_DECIMALDIGITS + 1;

	for (i = 0; i < FIXED_DECIMALDIGITS; i++)
	{
		j = offset - i;
		d = st % 10;
		st = st / 10;
		buf[j] = '0' + d;
	}

	// ending zero
	buf[offset + 1] = '\0';

	return buf;
}

Fixed-point IIR Filter Code Generator for ARM

Jordan November 30, 2010 Coded in ASM for the ARM
/* 
 * fixedp lowpass_iir(fixedp *w, fixedp x);
 *
 * Fixed point IIR filtering routine for ARM. Computes output y for
 * input x. The output will have the same fracbits as the input.
 *  w: caller-allocated array for state storage. Should be length 2*L.
 *  x: sample to filter
 * 
 * Required data:
 *   LENGTH: number of sections
 *   .sos: sos matrix
 *   SOS_FRACBITS: sos fracbits
 *   .gain: scale values array
 *   G_FRACBITS: scale values fracbits
 *   
 * Register usage:
 *   r0: address of internal state array (w)
 *   r1: x
 *   r2: address of SOS array
 *   r3: address of gain array
 *   r4: w0
 *   r5: w1
 *   r6: y
 *   r7: long multiply lo word
 *   r8: long multiply hi word
 *   r9:  B1
 *   r10: B2
 *   r11: A1
 *   r12: A2
 *   r14: loop counter
 */

.set LENGTH,  3
.set SOS_FRACBITS,  30
.set G_FRACBITS,  31

.section .rodata
.align 4

.sos:
    .word 0x49be7eaf, 0x40000000, 0xc4c9d93a, 0x251f228c
    .word 0x1d81c8a5, 0x40000000, 0xdaa0b600, 0x37cef3c1
    .word 0x40000000, 0x00000000, 0xd87b730c, 0x00000000

.gain:

    .word 0x06b1dbb5, 0x42fe27a0, 0x613d5d5a

.text
.arm

.global	lowpass_iir
.func   lowpass_iir
lowpass_iir:
	push {r4-r11, lr}

	mov  r14, #0								/* i = 0 */
	ldr  r2, =.sos								/* load address of SOS matrix */
	ldr  r3, =.gain								/* load address of gain coefficient array */

	cmp  r14, #LENGTH
	bge  .endloop

.loop:
	/* load all the SOS coefficients we need into r8-r12 and increment the SOS pointer */
	ldmia r2!, {r9-r12}							/* B1, B2, A1, A2 */

	/* x = gain[i]*x */
	ldr    r6, [r3], #4							/* load current element of gain array into r6 and increment by 4 */
	smull  r7, r8, r1, r6						/* 64-bit multiply: r5:r4 = x*gain[i]; */
	mov    r1, r7, lsr#G_FRACBITS				/* shift lo word to the right by G_FRACBITS */
	orr    r1, r1, r8, lsl#(32 - G_FRACBITS)		/* shift hi word to the right by G_FRACBITS  and OR with lo word*/

	/* load w0 and w1 into r4, r5, but do NOT increment */
	ldm    r0, {r4-r5}

	/* y(r6) = x(r1) + w[W0](r4)*/
	add r6, r1, r4

	/* w0(r4) = .sos[B1](r9)*x(r1) - .sos[A1](r11)*y(r6) + w[W1](r5); */
	rsb    r11, r11, #0							/* .sos[A1] = -sos[A1] */
	smull  r7, r8, r9, r1						/* r8:r7 = sos[B1]*x */
	smlal  r7, r8, r11, r6						/* r8:r7 += -sos[A1]*y */
	mov    r4, r7, lsr#SOS_FRACBITS				/* shift lo word to the right by SOS_FRACBITS */
	orr    r4, r4, r8, lsl#(32 - SOS_FRACBITS)	/* shift hi word to the right by SOS_FRACBITS and OR with lo word*/
	add    r4, r4, r5							/* add w1 */

	/* w2 = sos[B2]*x(r1) - .sos[A2](r12)*y(r6); */
	rsb    r12, r12, #0							/* .sos[A2] = -sos[A2] */
	smull  r7, r8, r10, r1                      /* r8:r7 = sos[B2](r10)*x(r1) */
	smlal  r7, r8, r12, r6						/* r8:r7 += -sos[A2](r12)*y(r6) */
	mov    r5, r7, lsr#SOS_FRACBITS				/* shift lo word to the right by SOS_FRACBITS */
	orr    r5, r5, r8, lsl#(32 - SOS_FRACBITS)	/* shift hi word to the right by SOS_FRACBITS and OR with lo word*/

	/* need to store w0, w1 back to memory and increment */
	stmia r0!, {r4-r5}

	/* x = y */
	mov    r1, r6

	/* increment pointer and branch to top of loop */
	add r14, r14, #1
	cmp r14, #LENGTH
	blt   .loop

.endloop:
	/* set return val, restore stack, and return */
	mov r0, r6
	pop {r4-r11, lr}
	bx  lr

.endfunc
.end

Fixed-point FIR Filter Code Generator for ARM Assembly

Jordan November 30, 2010 Coded in ASM for the ARM
/* **********************************************************************
 *
 * Fixed Point Filtering Library
 *
 * **********************************************************************
 *
 * lowpass_fir.S
 *
 * Jordan Rhee
 * rhee.jordan@gmail.com
 *
 * IEEE UCSD
 * http://ieee.ucsd.edu
 *
 * Generated with IEEE UCSD Fixed Pointer Filter Code Generator
 * http://ieee.ucsd.edu/projects/qfilt.php
 *
 * **********************************************************************/

/* 
 * fixedp lowpass_fir(fixedp *w, fixedp x);
 * 
 * Fixed point FIR filtering routine for ARM. Computes output y for
 * input x. The output will have the same fracbits as the input.
 *  w: caller-allocated array for state storage. Should be length LENGTH+1.
 *  x: sample to filter
 *
 * Required data:
 *   LENGTH: number of coefficients
 *   .h: coefficient array
 *   H_FRACBITS: fracbits of coefficients
 *   
 *   r0: address of internal state array. w[LENGTH] contains
 *       index of head of circular buffer.
 *   r1: x
 *   r2: address of coefficient array (h)
 *   r3: j: index of current state value
 *   r4: i: index of current coefficient
 *   r5: h[i]: current filter coefficient
 *   r6: w[j]: current state value
 *   r7: long multiply lo word
 *   r8: long multiply hi word
 */

.set LENGTH,  20
.set H_FRACBITS,  30

.section .rodata
.align 4

.h:

    .word 0xffc5ef57, 0xfeb3416c, 0xfdf673b8, 0xffc7fb45
    .word 0x02b1826b, 0x0123c987, 0xfb542f40, 0xfc248828
    .word 0x0ab1bf40, 0x1b3f7457, 0x1b3f7457, 0x0ab1bf40
    .word 0xfc248828, 0xfb542f40, 0x0123c987, 0x02b1826b
    .word 0xffc7fb45, 0xfdf673b8, 0xfeb3416c, 0xffc5ef57

.text
.arm

.global	lowpass_fir
.func   lowpass_fir
lowpass_fir:
	push {r4-r8}
	
	/* w(r0)[j(w[N])] = x */
	ldr  r3, [r0, #(4*LENGTH)]		/* load value of j */
	str  r1, [r0, r3, lsl #2]			/* store x into w[j] */

	/* y = 0; */
	mov r7, #0
	mov r8, #0

	/* load base address of coefficient array */
	ldr r2, =.h

	/* i = 0 */
	mov r4, #0
	cmp r4, #LENGTH
	bge .endloop

.loop:
	/* y += h[i] * w[j] */
	ldr    r5, [r2, r4, lsl #2]					/* r5 = h[i] */
	ldr    r6, [r0, r3, lsl #2]					/* r6 = w[j] */
	smlal  r7, r8, r5, r6						/* r8:r7 += h[i] * w[j] */
	
	subs r3, r3, #1								/* j-- */
	movmi r3, #(LENGTH - 1)						/* if j == -1, then j = N-1 */

	add   r4, r4, #1							/* i++ */
	cmp r4, #LENGTH								/* is i less than N */
	blt .loop

.endloop:
	add r3, r3, #1								/* increment j and store back to memory */
	cmp r3, #LENGTH
	moveq r3, #0
	str r3, [r0, #(4*LENGTH)]					/* save new value of j */
	
	mov    r0, r7, lsr #H_FRACBITS				/* shift lo word to the right by H_FRACBITS */
	orr    r0, r0, r8, lsl #(32 - H_FRACBITS)	/* shift hi word to the right by H_FRACBITS and OR with lo word*/

	pop {r4-r8}
	bx  lr

.endfunc
.end