Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
NE10_iir.c
1 /*
2  * Copyright 2012-15 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : dsp/NE10_iir.c
30  */
31 
32 #include "NE10_types.h"
33 
119  ne10_float32_t * pSrc,
120  ne10_float32_t * pDst,
121  ne10_uint32_t blockSize)
122 {
123  ne10_float32_t fcurr, fnext = 0, gcurr, gnext; /* Temporary variables for lattice stages */
124  ne10_float32_t acc; /* Accumlator */
125  ne10_uint32_t blkCnt, tapCnt; /* temporary variables for counts */
126  ne10_float32_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */
127  ne10_uint32_t numStages = S->numStages; /* number of stages */
128  ne10_float32_t *pState; /* State pointer */
129  ne10_float32_t *pStateCurnt; /* State current pointer */
130 
131 
132  /* Run the below code for Cortex-M4 and Cortex-M3 */
133 
134  gcurr = 0.0f;
135  blkCnt = blockSize;
136 
137  pState = &S->pState[0];
138 
139  /* Sample processing */
140  while (blkCnt > 0u)
141  {
142  /* Read Sample from input buffer */
143  /* fN(n) = x(n) */
144  fcurr = *pSrc++;
145 
146  /* Initialize state read pointer */
147  px1 = pState;
148  /* Initialize state write pointer */
149  px2 = pState;
150  /* Set accumulator to zero */
151  acc = 0.0f;
152  /* Initialize Ladder coeff pointer */
153  pv = &S->pvCoeffs[S->numStages];
154  /* Initialize Reflection coeff pointer */
155  pk = &S->pkCoeffs[0];
156 
157 
158  /* Process sample for first tap */
159  gcurr = *px1++;
160  /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
161  fnext = fcurr - ( (*pk) * gcurr);
162  /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
163  gnext = (fnext * (*pk++)) + gcurr;
164  /* write gN(n) into state for next sample processing */
165  *px2++ = gnext;
166  /* y(n) += gN(n) * vN */
167  acc += (gnext * (*pv--));
168 
169  /* Update f values for next coefficient processing */
170  fcurr = fnext;
171 
172  /* Loop unrolling. Process 4 taps at a time. */
173  tapCnt = (numStages - 1u) >> 2;
174 
175  while (tapCnt > 0u)
176  {
177  /* Process sample for 2nd, 6th ...taps */
178  /* Read gN-2(n-1) from state buffer */
179  gcurr = *px1++;
180  /* Process sample for 2nd, 6th .. taps */
181  /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
182  fnext = fcurr - ( (*pk) * gcurr);
183  /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
184  gnext = (fnext * (*pk++)) + gcurr;
185  /* y(n) += gN-1(n) * vN-1 */
186  /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
187  acc += (gnext * (*pv--));
188  /* write gN-1(n) into state for next sample processing */
189  *px2++ = gnext;
190 
191 
192  /* Process sample for 3nd, 7th ...taps */
193  /* Read gN-3(n-1) from state buffer */
194  gcurr = *px1++;
195  /* Process sample for 3rd, 7th .. taps */
196  /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
197  fcurr = fnext - ( (*pk) * gcurr);
198  /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
199  gnext = (fcurr * (*pk++)) + gcurr;
200  /* y(n) += gN-2(n) * vN-2 */
201  /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
202  acc += (gnext * (*pv--));
203  /* write gN-2(n) into state for next sample processing */
204  *px2++ = gnext;
205 
206 
207  /* Process sample for 4th, 8th ...taps */
208  /* Read gN-4(n-1) from state buffer */
209  gcurr = *px1++;
210  /* Process sample for 4th, 8th .. taps */
211  /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
212  fnext = fcurr - ( (*pk) * gcurr);
213  /* gN-3(n) = kN-3 * fN-4(n) + gN-4(n-1) */
214  gnext = (fnext * (*pk++)) + gcurr;
215  /* y(n) += gN-3(n) * vN-3 */
216  /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
217  acc += (gnext * (*pv--));
218  /* write gN-3(n) into state for next sample processing */
219  *px2++ = gnext;
220 
221 
222  /* Process sample for 5th, 9th ...taps */
223  /* Read gN-5(n-1) from state buffer */
224  gcurr = *px1++;
225  /* Process sample for 5th, 9th .. taps */
226  /* fN-5(n) = fN-4(n) - kN-4 * gN-1(n-1) */
227  fcurr = fnext - ( (*pk) * gcurr);
228  /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
229  gnext = (fcurr * (*pk++)) + gcurr;
230  /* y(n) += gN-4(n) * vN-4 */
231  /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
232  acc += (gnext * (*pv--));
233  /* write gN-4(n) into state for next sample processing */
234  *px2++ = gnext;
235 
236  tapCnt--;
237 
238  }
239 
240  fnext = fcurr;
241 
242  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
243  tapCnt = (numStages - 1u) % 0x4u;
244 
245  while (tapCnt > 0u)
246  {
247  gcurr = *px1++;
248  /* Process sample for last taps */
249  fnext = fcurr - ( (*pk) * gcurr);
250  gnext = (fnext * (*pk++)) + gcurr;
251  /* Output samples for last taps */
252  acc += (gnext * (*pv--));
253  *px2++ = gnext;
254  fcurr = fnext;
255 
256  tapCnt--;
257 
258  }
259 
260 
261  /* y(n) += g0(n) * v0 */
262  acc += (fnext * (*pv));
263 
264  *px2++ = fnext;
265 
266  /* write out into pDst */
267  *pDst++ = acc;
268 
269  /* Advance the state pointer by 4 to process the next group of 4 samples */
270  pState = pState + 1u;
271  blkCnt--;
272 
273  }
274 
275  /* Processing is complete. Now copy last S->numStages samples to start of the buffer
276  for the preperation of next frame process */
277 
278  /* Points to the start of the state buffer */
279  pStateCurnt = &S->pState[0];
280  pState = &S->pState[blockSize];
281 
282  tapCnt = numStages >> 2u;
283 
284  /* copy data */
285  while (tapCnt > 0u)
286  {
287  *pStateCurnt++ = *pState++;
288  *pStateCurnt++ = *pState++;
289  *pStateCurnt++ = *pState++;
290  *pStateCurnt++ = *pState++;
291 
292  /* Decrement the loop counter */
293  tapCnt--;
294 
295  }
296 
297  /* Calculate remaining number of copies */
298  tapCnt = (numStages) % 0x4u;
299 
300  /* Copy the remaining q31_t data */
301  while (tapCnt > 0u)
302  {
303  *pStateCurnt++ = *pState++;
304 
305  /* Decrement the loop counter */
306  tapCnt--;
307  }
308 
309 } //end of IIR_Lattice group
ne10_uint16_t numStages
numStages of the of lattice filter.
Definition: NE10_types.h:421
ne10_float32_t * pkCoeffs
Points to the reflection coefficient array.
Definition: NE10_types.h:423
void ne10_iir_lattice_float_c(const ne10_iir_lattice_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_uint32_t blockSize)
Processing function for the floating-point IIR lattice filter.
Definition: NE10_iir.c:118
Instance structure for the floating point IIR Lattice filter.
Definition: NE10_types.h:419
ne10_float32_t * pvCoeffs
Points to the ladder coefficient array.
Definition: NE10_types.h:424
ne10_float32_t * pState
Points to the state variable array.
Definition: NE10_types.h:422