38 #include "NE10_macros.h"
40 #include "unit_test_common.h"
48 #define TEST_LENGTH_SAMPLES (32768)
49 #define MIN_LENGTH_SAMPLES_CPX (4)
50 #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX*2)
52 #define TEST_COUNT 10000000
59 static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES * 2];
66 static ne10_float32_t * guarded_in_c = NULL;
67 static ne10_float32_t * guarded_in_neon = NULL;
68 static ne10_float32_t * in_c = NULL;
69 static ne10_float32_t * in_neon = NULL;
71 static ne10_float32_t * guarded_out_c = NULL;
72 static ne10_float32_t * guarded_out_neon = NULL;
73 static ne10_float32_t * out_c = NULL;
74 static ne10_float32_t * out_neon = NULL;
76 static ne10_float32_t snr = 0.0f;
78 static ne10_int64_t time_c = 0;
79 static ne10_int64_t time_neon = 0;
80 static ne10_float32_t time_speedup = 0.0f;
81 static ne10_float32_t time_savings = 0.0f;
86 static ne10_int32_t test_c2c_alloc (ne10_int32_t fftSize);
88 void test_fft_c2c_1d_float32_conformance()
90 ne10_int32_t fftSize = 0;
91 ne10_int32_t flag_result = NE10_OK;
93 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
95 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
97 fprintf (stdout,
"FFT size %d\n", fftSize);
98 flag_result = test_c2c_alloc (fftSize);
99 if (flag_result == NE10_ERR)
105 memcpy (in_c, testInput_f32, 2 * fftSize *
sizeof (ne10_float32_t));
106 memcpy (in_neon, testInput_f32, 2 * fftSize *
sizeof (ne10_float32_t));
108 GUARD_ARRAY (out_c, fftSize * 2);
109 GUARD_ARRAY (out_neon, fftSize * 2);
114 CHECK_ARRAY_GUARD (out_c, fftSize * 2);
115 CHECK_ARRAY_GUARD (out_neon, fftSize * 2);
118 snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize * 2);
119 assert_false ( (snr < SNR_THRESHOLD));
122 memcpy (in_c, testInput_f32, 2 * fftSize *
sizeof (ne10_float32_t));
123 memcpy (in_neon, testInput_f32, 2 * fftSize *
sizeof (ne10_float32_t));
125 GUARD_ARRAY (out_c, fftSize * 2);
126 GUARD_ARRAY (out_neon, fftSize * 2);
131 CHECK_ARRAY_GUARD (out_c, fftSize * 2);
132 CHECK_ARRAY_GUARD (out_neon, fftSize * 2);
135 snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize * 2);
136 assert_false ( (snr < SNR_THRESHOLD));
139 NE10_FREE (cfg_neon);
143 void test_fft_c2c_1d_float32_performance()
146 ne10_int32_t fftSize = 0;
147 ne10_int32_t flag_result = NE10_OK;
148 ne10_int32_t test_loop = 0;
150 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
151 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"FFT Length",
"C Time in ms",
"NEON Time in ms",
"Time Savings",
"Performance Ratio");
153 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
155 fprintf (stdout,
"FFT size %d\n", fftSize);
158 memcpy (in_c, testInput_f32, 2 * fftSize *
sizeof (ne10_float32_t));
159 memcpy (in_neon, testInput_f32, 2 * fftSize *
sizeof (ne10_float32_t));
160 flag_result = test_c2c_alloc (fftSize);
161 if (flag_result == NE10_ERR)
166 test_loop = TEST_COUNT / fftSize;
172 for (i = 0; i < test_loop; i++)
180 for (i = 0; i < test_loop; i++)
185 time_speedup = (ne10_float32_t) time_c / time_neon;
186 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
187 ne10_log (__FUNCTION__,
"Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
190 memcpy (in_c, out_c, 2 * fftSize *
sizeof (ne10_float32_t));
191 memcpy (in_neon, out_c, 2 * fftSize *
sizeof (ne10_float32_t));
197 for (i = 0; i < test_loop; i++)
205 for (i = 0; i < test_loop; i++)
210 time_speedup = (ne10_float32_t) time_c / time_neon;
211 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
212 ne10_log (__FUNCTION__,
"Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
215 NE10_FREE (cfg_neon);
219 void test_fft_r2c_1d_float32_conformance()
223 ne10_int32_t fftSize = 0;
226 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
228 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
230 fprintf (stdout,
"FFT size %d\n", fftSize);
233 memcpy (in_c, testInput_f32, fftSize *
sizeof (ne10_float32_t));
234 memcpy (in_neon, testInput_f32, fftSize *
sizeof (ne10_float32_t));
238 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
242 GUARD_ARRAY (out_c, (fftSize / 2 + 1) * 2);
243 GUARD_ARRAY (out_neon, (fftSize / 2 + 1) * 2);
248 CHECK_ARRAY_GUARD (out_c, (fftSize / 2 + 1) * 2);
249 CHECK_ARRAY_GUARD (out_neon, (fftSize / 2 + 1) * 2);
252 snr = CAL_SNR_FLOAT32 (out_c, out_neon, (fftSize / 2 + 1) * 2);
253 assert_false ( (snr < SNR_THRESHOLD));
256 for (i = 1; i < (fftSize / 2); i++)
258 in_c[2 * i] = testInput_f32[2 * i];
259 in_c[2 * i + 1] = testInput_f32[2 * i + 1];
260 in_c[2 * (fftSize - i)] = in_c[2 * i];
261 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
263 in_c[0] = testInput_f32[0];
265 in_c[fftSize] = testInput_f32[1];
266 in_c[fftSize + 1] = 0;
267 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (ne10_float32_t));
269 GUARD_ARRAY (out_c, fftSize);
270 GUARD_ARRAY (out_neon, fftSize);
275 CHECK_ARRAY_GUARD (out_c, fftSize);
276 CHECK_ARRAY_GUARD (out_neon, fftSize);
279 snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize);
280 assert_false ( (snr < SNR_THRESHOLD));
286 void test_fft_r2c_1d_float32_performance()
290 ne10_int32_t fftSize = 0;
292 ne10_int32_t test_loop = 0;
294 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
295 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"FFT Length",
"C Time in ms",
"NEON Time in ms",
"Time Savings",
"Performance Ratio");
297 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
299 fprintf (stdout,
"FFT size %d\n", fftSize);
302 memcpy (in_c, testInput_f32, fftSize *
sizeof (ne10_float32_t));
303 memcpy (in_neon, testInput_f32, fftSize *
sizeof (ne10_float32_t));
307 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
310 test_loop = TEST_COUNT / fftSize;
316 for (i = 0; i < test_loop; i++)
324 for (i = 0; i < test_loop; i++)
329 time_speedup = (ne10_float32_t) time_c / time_neon;
330 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
331 ne10_log (__FUNCTION__,
"Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
334 for (i = 1; i < (fftSize / 2); i++)
336 in_c[2 * i] = testInput_f32[2 * i];
337 in_c[2 * i + 1] = testInput_f32[2 * i + 1];
338 in_c[2 * (fftSize - i)] = in_c[2 * i];
339 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
341 in_c[0] = testInput_f32[0];
343 in_c[fftSize] = testInput_f32[1];
344 in_c[fftSize + 1] = 0;
345 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (ne10_float32_t));
351 for (i = 0; i < test_loop; i++)
359 for (i = 0; i < test_loop; i++)
364 time_speedup = (ne10_float32_t) time_c / time_neon;
365 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
366 ne10_log (__FUNCTION__,
"Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
372 static void my_test_setup (
void)
374 ne10_log_buffer_ptr = ne10_log_buffer;
378 guarded_in_c = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_float32_t));
379 guarded_in_neon = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_float32_t));
380 in_c = guarded_in_c + ARRAY_GUARD_LEN;
381 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
384 guarded_out_c = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_float32_t));
385 guarded_out_neon = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_float32_t));
386 out_c = guarded_out_c + ARRAY_GUARD_LEN;
387 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
389 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
391 testInput_f32[i] = (ne10_float32_t) (drand48() * 32768.0f - 16384.0f);
395 static void my_test_teardown (
void)
397 NE10_FREE (guarded_in_c);
398 NE10_FREE (guarded_in_neon);
399 NE10_FREE (guarded_out_c);
400 NE10_FREE (guarded_out_neon);
403 void test_fft_c2c_1d_float32()
405 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
406 test_fft_c2c_1d_float32_conformance();
409 #if defined (PERFORMANCE_TEST)
410 test_fft_c2c_1d_float32_performance();
414 void test_fft_r2c_1d_float32()
416 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
417 test_fft_r2c_1d_float32_conformance();
420 #if defined (PERFORMANCE_TEST)
421 test_fft_r2c_1d_float32_performance();
425 void test_fixture_fft_c2c_1d_float32 (
void)
427 test_fixture_start();
429 fixture_setup (my_test_setup);
431 run_test (test_fft_c2c_1d_float32);
433 fixture_teardown(my_test_teardown);
438 void test_fixture_fft_r2c_1d_float32 (
void)
440 test_fixture_start();
442 fixture_setup (my_test_setup);
444 run_test (test_fft_r2c_1d_float32);
446 fixture_teardown(my_test_teardown);
451 ne10_int32_t test_c2c_alloc (ne10_int32_t fftSize)
454 NE10_FREE (cfg_neon);
459 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
464 if (cfg_neon == NULL)
467 fprintf (stdout,
"======ERROR, FFT alloc fails\n");