Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
NE10_init_dsp.c
1 /*
2  * Copyright 2012-15 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <stdio.h>
29 
30 #include "NE10_dsp.h"
31 
32 ne10_result_t ne10_init_dsp (ne10_int32_t is_NEON_available)
33 {
34  if (NE10_OK == is_NEON_available)
35  {
36  ne10_fft_alloc_c2c_float32 = ne10_fft_alloc_c2c_float32_neon;
37  ne10_fft_alloc_c2c_int32 = ne10_fft_alloc_c2c_int32_neon;
38  ne10_fft_c2c_1d_float32 = ne10_fft_c2c_1d_float32_neon;
39  ne10_fft_r2c_1d_float32 = ne10_fft_r2c_1d_float32_neon;
40  ne10_fft_c2r_1d_float32 = ne10_fft_c2r_1d_float32_neon;
41 
42  ne10_fft_c2c_1d_int32 = ne10_fft_c2c_1d_int32_neon;
43  ne10_fft_r2c_1d_int32 = ne10_fft_r2c_1d_int32_neon;
44  ne10_fft_c2r_1d_int32 = ne10_fft_c2r_1d_int32_neon;
45 
46  ne10_fft_c2c_1d_int16 = ne10_fft_c2c_1d_int16_neon;
47  ne10_fft_c2r_1d_int16 = ne10_fft_c2r_1d_int16_neon;
48  ne10_fft_r2c_1d_int16 = ne10_fft_r2c_1d_int16_neon;
49 
50 #ifdef ENABLE_NE10_FIR_FLOAT_NEON
51  ne10_fir_float = ne10_fir_float_neon;
52 #else
53  ne10_fir_float = ne10_fir_float_c;
54 #endif // ENABLE_NE10_FIR_FLOAT_NEON
55 
56 #ifdef ENABLE_NE10_FIR_DECIMATE_FLOAT_NEON
57  ne10_fir_decimate_float = ne10_fir_decimate_float_neon;
58 #else
59  ne10_fir_decimate_float = ne10_fir_decimate_float_c;
60 #endif // ENABLE_NE10_FIR_DECIMATE_FLOAT_NEON
61 
62 #ifdef ENABLE_NE10_FIR_INTERPOLATE_FLOAT_NEON
63  ne10_fir_interpolate_float = ne10_fir_interpolate_float_neon;
64 #else
65  ne10_fir_interpolate_float = ne10_fir_interpolate_float_c;
66 #endif // ENABLE_NE10_FIR_INTERPOLATE_FLOAT_NEON
67 
68 #ifdef ENABLE_NE10_FIR_LATTICE_FLOAT_NEON
69  ne10_fir_lattice_float = ne10_fir_lattice_float_neon;
70 #else
71  ne10_fir_lattice_float = ne10_fir_lattice_float_c;
72 #endif // ENABLE_NE10_FIR_LATTICE_FLOAT_NEON
73 
74 #ifdef ENABLE_NE10_FIR_SPARSE_FLOAT_NEON
75  ne10_fir_sparse_float = ne10_fir_sparse_float_neon;
76 #else
77  ne10_fir_sparse_float = ne10_fir_sparse_float_c;
78 #endif // ENABLE_NE10_FIR_SPARSE_FLOAT_NEON
79 
80 #ifdef ENABLE_NE10_IIR_LATTICE_FLOAT_NEON
81  ne10_iir_lattice_float = ne10_iir_lattice_float_neon;
82 #else
83  ne10_iir_lattice_float = ne10_iir_lattice_float_c;
84 #endif // ENABLE_NE10_IIR_LATTICE_FLOAT_NEON
85  }
86  else
87  {
88  ne10_fft_alloc_c2c_float32 = ne10_fft_alloc_c2c_float32_c;
89  ne10_fft_alloc_c2c_int32 = ne10_fft_alloc_c2c_int32_c;
90  ne10_fft_c2c_1d_float32 = ne10_fft_c2c_1d_float32_c;
91  ne10_fft_r2c_1d_float32 = ne10_fft_r2c_1d_float32_c;
92  ne10_fft_c2r_1d_float32 = ne10_fft_c2r_1d_float32_c;
93 
94  ne10_fft_c2c_1d_int32 = ne10_fft_c2c_1d_int32_c;
95  ne10_fft_r2c_1d_int32 = ne10_fft_r2c_1d_int32_c;
96  ne10_fft_c2r_1d_int32 = ne10_fft_c2r_1d_int32_c;
97 
98  ne10_fft_c2c_1d_int16 = ne10_fft_c2c_1d_int16_c;
99  ne10_fft_r2c_1d_int16 = ne10_fft_r2c_1d_int16_c;
100  ne10_fft_c2r_1d_int16 = ne10_fft_c2r_1d_int16_c;
101 
102  ne10_fir_float = ne10_fir_float_c;
103  ne10_fir_decimate_float = ne10_fir_decimate_float_c;
104  ne10_fir_interpolate_float = ne10_fir_interpolate_float_c;
105  ne10_fir_lattice_float = ne10_fir_lattice_float_c;
106  ne10_fir_sparse_float = ne10_fir_sparse_float_c;
107 
108  ne10_iir_lattice_float = ne10_iir_lattice_float_c;
109  }
110  return NE10_OK;
111 }
112 
113 // These are actual definitions of our function pointers that are declared in inc/NE10_dsp.h
114 ne10_fft_cfg_float32_t (*ne10_fft_alloc_c2c_float32) (ne10_int32_t nfft);
115 ne10_fft_cfg_int32_t (*ne10_fft_alloc_c2c_int32) (ne10_int32_t nfft);
116 
117 void (*ne10_fft_c2c_1d_float32) (ne10_fft_cpx_float32_t *fout,
120  ne10_int32_t inverse_fft);
121 
122 void (*ne10_fft_r2c_1d_float32) (ne10_fft_cpx_float32_t *fout,
123  ne10_float32_t *fin,
125 
126 void (*ne10_fft_c2r_1d_float32) (ne10_float32_t *fout,
129 
130 void (*ne10_fft_c2c_1d_int32) (ne10_fft_cpx_int32_t *fout,
133  ne10_int32_t inverse_fft,
134  ne10_int32_t scaled_flag);
135 
136 
137 void (*ne10_fft_r2c_1d_int32) (ne10_fft_cpx_int32_t *fout,
138  ne10_int32_t *fin,
140  ne10_int32_t scaled_flag);
141 
142 void (*ne10_fft_c2r_1d_int32) (ne10_int32_t *fout,
145  ne10_int32_t scaled_flag);
146 
147 void (*ne10_fft_c2c_1d_int16) (ne10_fft_cpx_int16_t *fout,
150  ne10_int32_t inverse_fft,
151  ne10_int32_t scaled_flag);
152 
153 void (*ne10_fft_r2c_1d_int16) (ne10_fft_cpx_int16_t *fout,
154  ne10_int16_t *fin,
156  ne10_int32_t scaled_flag);
157 
158 void (*ne10_fft_c2r_1d_int16) (ne10_int16_t *fout,
161  ne10_int32_t scaled_flag);
162 
163 void (*ne10_fir_float) (const ne10_fir_instance_f32_t * S,
164  ne10_float32_t * pSrc,
165  ne10_float32_t * pDst,
166  ne10_uint32_t blockSize);
167 
168 void (*ne10_fir_decimate_float) (
170  ne10_float32_t * pSrc,
171  ne10_float32_t * pDst,
172  ne10_uint32_t blockSize);
173 
174 void (*ne10_fir_interpolate_float) (
176  ne10_float32_t * pSrc,
177  ne10_float32_t * pDst,
178  ne10_uint32_t blockSize);
179 
180 void (*ne10_fir_lattice_float) (
182  ne10_float32_t * pSrc,
183  ne10_float32_t * pDst,
184  ne10_uint32_t blockSize);
185 
186 void (*ne10_fir_sparse_float) (
188  ne10_float32_t * pSrc,
189  ne10_float32_t * pDst,
190  ne10_float32_t * pScratchIn,
191  ne10_uint32_t blockSize);
192 
193 void (*ne10_iir_lattice_float) (const ne10_iir_lattice_instance_f32_t * S,
194  ne10_float32_t * pSrc,
195  ne10_float32_t * pDst,
196  ne10_uint32_t blockSize);
ne10_fft_state_float32_t
structure for the floating point FFT state
Definition: NE10_types.h:240
ne10_fft_c2r_1d_int32_neon
void ne10_fft_c2r_1d_int32_neon(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int32 data.
Definition: NE10_fft_int32.neonintrinsic.c:1905
ne10_fft_c2r_1d_float32_c
void ne10_fft_c2r_1d_float32_c(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 IFFT (complex to real) of float(32-bit) data.
Definition: NE10_fft_float32.c:1305
ne10_fft_c2c_1d_int16_neon
void ne10_fft_c2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.
Definition: NE10_fft_int16.neonintrinsic.c:1487
ne10_fft_c2c_1d_float32_neon
void ne10_fft_c2c_1d_float32_neon(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Mixed radix-2/3/4/5 complex FFT/IFFT of float(32-bit) data.
Definition: NE10_fft_float32.neonintrinsic.c:1459
ne10_fft_r2c_1d_int16_neon
void ne10_fft_r2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int16 data.
Definition: NE10_fft_int16.neonintrinsic.c:1584
ne10_fft_r2c_1d_int16_c
void ne10_fft_r2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int16 data.
Definition: NE10_fft_int16.c:1233
ne10_fir_instance_f32_t
Instance structure for the floating-point FIR filter.
Definition: NE10_types.h:364
ne10_fft_c2r_1d_int16_c
void ne10_fft_c2r_1d_int16_c(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int16 data.
Definition: NE10_fft_int16.c:1255
ne10_fft_c2r_1d_int16_neon
void ne10_fft_c2r_1d_int16_neon(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int16 data.
Definition: NE10_fft_int16.neonintrinsic.c:1616
ne10_fft_alloc_c2c_float32_neon
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_neon(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition: NE10_fft.c:337
ne10_fft_c2c_1d_int16_c
void ne10_fft_c2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 16-bit fixed point data.
Definition: NE10_fft_int16.c:1110
ne10_fft_r2c_1d_int32_c
void ne10_fft_r2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int32 data.
Definition: NE10_fft_int32.c:1219
ne10_fft_state_int32_t
Definition: NE10_types.h:334
ne10_fft_cpx_float32_t
Definition: NE10_types.h:230
ne10_fft_alloc_c2c_int32_neon
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition: NE10_fft.c:435
ne10_fft_cpx_int32_t
structure for the 32 bits fixed point FFT function.
Definition: NE10_types.h:328
ne10_fir_lattice_instance_f32_t
Instance structure for the floating point FIR Lattice filter.
Definition: NE10_types.h:374
ne10_fir_sparse_instance_f32_t
Instance structure for the floating-point FIR Sparse filter.
Definition: NE10_types.h:406
ne10_fir_lattice_float_c
void ne10_fir_lattice_float_c(const ne10_fir_lattice_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_uint32_t blockSize)
Processing function for the floating-point FIR lattice filter.
Definition: NE10_fir.c:972
ne10_fft_state_int16_t
Definition: NE10_types.h:303
ne10_fft_alloc_c2c_float32_c
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition: NE10_fft_float32.c:997
ne10_fir_sparse_float_c
void ne10_fir_sparse_float_c(ne10_fir_sparse_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_float32_t *pScratchIn, ne10_uint32_t blockSize)
Processing function for the floating-point sparse FIR filter.
Definition: NE10_fir.c:1439
ne10_fft_c2c_1d_int32_c
void ne10_fft_c2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.
Definition: NE10_fft_int32.c:1072
ne10_fft_r2c_1d_int32_neon
void ne10_fft_r2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int32 data.
Definition: NE10_fft_int32.neonintrinsic.c:1876
ne10_fir_interpolate_instance_f32_t
Instance structure for the floating-point FIR Interpolation.
Definition: NE10_types.h:395
ne10_fir_interpolate_float_c
void ne10_fir_interpolate_float_c(const ne10_fir_interpolate_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_uint32_t blockSize)
Processing function for the floating-point FIR interpolator.
Definition: NE10_fir.c:712
ne10_fft_c2c_1d_float32_c
void ne10_fft_c2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Mixed radix-2/3/4/5 complex FFT/IFFT of float(32-bit) data.
Definition: NE10_fft_float32.c:1065
ne10_fft_r2c_state_int32_t
Definition: NE10_types.h:345
ne10_iir_lattice_float_c
void ne10_iir_lattice_float_c(const ne10_iir_lattice_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_uint32_t blockSize)
Processing function for the floating-point IIR lattice filter.
Definition: NE10_iir.c:118
ne10_fft_alloc_c2c_int32_c
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition: NE10_fft_int32.c:1027
ne10_fft_r2c_state_int16_t
Definition: NE10_types.h:313
ne10_fft_r2c_state_float32_t
Definition: NE10_types.h:272
ne10_fft_c2r_1d_float32_neon
void ne10_fft_c2r_1d_float32_neon(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 IFFT (complex to real) of float(32-bit) data.
Definition: NE10_rfft_float32.neonintrinsic.c:1752
ne10_fir_decimate_float_c
void ne10_fir_decimate_float_c(const ne10_fir_decimate_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_uint32_t blockSize)
Processing function for the floating-point FIR decimator.
Definition: NE10_fir.c:452
ne10_fft_c2r_1d_int32_c
void ne10_fft_c2r_1d_int32_c(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int32 data.
Definition: NE10_fft_int32.c:1241
ne10_fir_decimate_instance_f32_t
Instance structure for the floating-point FIR Decimation.
Definition: NE10_types.h:384
ne10_fft_r2c_1d_float32_c
void ne10_fft_r2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 FFT (real to complex) of float(32-bit) data.
Definition: NE10_fft_float32.c:1285
ne10_fft_cpx_int16_t
structure for the 16 bits fixed point FFT function.
Definition: NE10_types.h:297
ne10_fft_r2c_1d_float32_neon
void ne10_fft_r2c_1d_float32_neon(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 FFT (real to complex) of float(32-bit) data.
Definition: NE10_rfft_float32.neonintrinsic.c:1717
ne10_iir_lattice_instance_f32_t
Instance structure for the floating point IIR Lattice filter.
Definition: NE10_types.h:419
ne10_fft_c2c_1d_int32_neon
void ne10_fft_c2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.
Definition: NE10_fft_int32.neonintrinsic.c:1739
ne10_fir_float_c
void ne10_fir_float_c(const ne10_fir_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_uint32_t blockSize)
Definition: NE10_fir.c:121