32 #include "NE10_types.h"
41 ne10_result_t ne10_divc_float_neon (ne10_float32_t * dst, ne10_float32_t * src,
const ne10_float32_t cst, ne10_uint32_t count)
43 NE10_XC_OPERATION_FLOAT_NEON
46 float32x4_t rec = vrecpeq_f32 (n_cst);
47 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
48 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
49 n_dst = vmulq_f32 (n_src , rec);
52 float32x2_t rec = vrecpe_f32 (n_tmp_cst);
53 rec = vmul_f32 (vrecps_f32 (n_tmp_cst, rec), rec);
54 rec = vmul_f32 (vrecps_f32 (n_tmp_cst, rec), rec);
55 n_tmp_src = vmul_f32 (n_tmp_src, rec);
61 NE10_XC_OPERATION_VEC2F_NEON
64 float32x4_t rec = vrecpeq_f32 (n_cst);
65 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
66 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
67 n_dst = vmulq_f32 (n_src , rec);
70 float32x2_t rec = vrecpe_f32 (n_tmp_cst);
71 rec = vmul_f32 (vrecps_f32 (n_tmp_cst, rec), rec);
72 rec = vmul_f32 (vrecps_f32 (n_tmp_cst, rec), rec);
73 n_tmp_src = vmul_f32 (n_tmp_src, rec);
79 NE10_XC_OPERATION_VEC3F_NEON
82 float32x4_t rec = vrecpeq_f32 (n_cst1);
83 rec = vmulq_f32 (vrecpsq_f32 (n_cst1, rec), rec);
84 rec = vmulq_f32 (vrecpsq_f32 (n_cst1, rec), rec);
85 n_dst1 = vmulq_f32 (n_src1 , rec);
87 rec = vrecpeq_f32 (n_cst2);
88 rec = vmulq_f32 (vrecpsq_f32 (n_cst2, rec), rec);
89 rec = vmulq_f32 (vrecpsq_f32 (n_cst2, rec), rec);
90 n_dst2 = vmulq_f32 (n_src2 , rec);
92 rec = vrecpeq_f32 (n_cst3);
93 rec = vmulq_f32 (vrecpsq_f32 (n_cst3, rec), rec);
94 rec = vmulq_f32 (vrecpsq_f32 (n_cst3, rec), rec);
95 n_dst3 = vmulq_f32 (n_src3 , rec);
98 float32x2_t rec = vrecpe_f32 (n_tmp_cst.val[0]);
99 rec = vmul_f32 (vrecps_f32 (n_tmp_cst.val[0], rec), rec);
100 rec = vmul_f32 (vrecps_f32 (n_tmp_cst.val[0], rec), rec);
101 n_tmp_src.val[0] = vmul_f32 (n_tmp_src.val[0] , rec);
103 rec = vrecpe_f32 (n_tmp_cst.val[1]);
104 rec = vmul_f32 (vrecps_f32 (n_tmp_cst.val[1], rec), rec);
105 rec = vmul_f32 (vrecps_f32 (n_tmp_cst.val[1], rec), rec);
106 n_tmp_src.val[1] = vmul_f32 (n_tmp_src.val[1] , rec);
108 rec = vrecpe_f32 (n_tmp_cst.val[2]);
109 rec = vmul_f32 (vrecps_f32 (n_tmp_cst.val[2], rec), rec);
110 rec = vmul_f32 (vrecps_f32 (n_tmp_cst.val[2], rec), rec);
111 n_tmp_src.val[2] = vmul_f32 (n_tmp_src.val[2] , rec);
117 NE10_XC_OPERATION_VEC4F_NEON
120 float32x4_t rec = vrecpeq_f32 (n_cst);
121 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
122 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
123 n_dst = vmulq_f32 (n_src , rec);