ESP-IDF Firmware
Firmware architecture and call graph
Loading...
Searching...
No Matches
dsps_fft4r_fc32_ae32.c
Go to the documentation of this file.
1// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "dsps_fft2r.h"
16#include "dsps_fft4r.h"
17#include "dsp_common.h"
18#include "dsp_types.h"
19#include <math.h>
20#include "esp_attr.h"
21
22#include "dsps_fft4r_platform.h"
23
24
25#if (dsps_cplx2real_fc32_ae32_enabled == 1)
26esp_err_t dsps_cplx2real_fc32_ae32_(float *data, int fft_points, float *table, int table_size)
27{
28 float *ptr_inv;
29
30 int wind_step = table_size / (fft_points);
31
32 float *win0 = table + wind_step;
33 float *win1 = table + wind_step * 2;
34
37 asm volatile ("const.s f14, 0"); //f14 = 0f;
38 asm volatile ("lsi f0, %0, 0" :: "a" (data)); //f0 = *data;
39 asm volatile ("lsi f1, %0, 4" :: "a" (data)); //f1 = *(data + 1);
40 asm volatile ("addx8 %0, %1, %2" : "=a" (ptr_inv) : "a" (fft_points), "a" (data)); //ptr_inv = data + fft_points * 2;
41 asm volatile ("add.s f6, f0, f1"); //f6 = f0 + f1;
42 asm volatile ("sub.s f7, f0, f1"); //f7 = f0 - f1;
43
44 asm volatile ("srli %0, %0, 2" : "+a" (fft_points)); //fft_points >>= 2;
45
46 asm volatile ("const.s f14, 3"); //f14 = 0.5f;, this is for multiply 0.5
47 asm volatile ("neg.s f15, f14"); //f15 = -f14;
48
49 asm volatile ("ssi f6, %0, 0" :: "a" (data)); //*data = f6;
50 asm volatile ("ssi f7, %0, 4" :: "a" (data)); //*ptr_inv = f7;
51 asm volatile ("addi %0, %0, -16" : "+a" (ptr_inv)); //ptr_inv -= 4; ///here increase address by -4 because float load/store not support negective immediate offset
52
53 asm volatile ("loopnez %0, __loop_end_fftr_real_post_proc" :: "a" (fft_points)); //for (k = 0; k < fft_points; k++) {
54 asm volatile ("lsi f1, %0, 12" :: "a" (data)); //f1 = *(data + 3);
55 asm volatile ("lsi f3, %0, 12" :: "a" (ptr_inv)); //f3 = *(ptr_inv + 3);
56 asm volatile ("lsi f0, %0, 8" :: "a" (data)); //f0 = *(data + 2);
57 asm volatile ("lsi f2, %0, 8" :: "a" (ptr_inv)); //f2 = *(ptr_inv + 2);
58
59 asm volatile ("lsi f7, %0, 20" :: "a" (data)); //f7 = *(data + 5);
60 asm volatile ("lsi f9, %0, 4" :: "a" (ptr_inv)); //f9 = *(ptr_inv + 1);
61
62 asm volatile ("lsi f6, %0, 16" :: "a" (data)); //f6 = *(data + 4);
63 asm volatile ("lsi f8, %0, 0" :: "a" (ptr_inv)); //f8 = *ptr_inv;
64 asm volatile ("sub.s f5, f1, f3"); //f5 = f1 - f3;
65 asm volatile ("add.s f4, f0, f2"); //f4 = f0 + f2;
66 asm volatile ("sub.s f11, f7, f9"); //f11 = f7 - f9;
67 asm volatile ("add.s f10, f6, f8"); //f10 = f6 + f8;
68
69 asm volatile ("add.s f1, f1, f3"); //f1 = f1 + f3;
70 asm volatile ("sub.s f0, f0, f2"); //f0 = f0 - f2;
71
72 asm volatile ("lsi f12, %0, 4" :: "a" (win0)); //f12 = *(win + 1);
73 asm volatile ("add.s f7, f7, f9"); //f7 = f7 + f9;
74 asm volatile ("sub.s f6, f6, f8"); //f6 = f6 - f8;
75 asm volatile ("lsi f13, %0, 4" :: "a" (win1)); //f13 = *(win + 3);
76
77 asm volatile ("mul.s f3, f1, f12"); //f3 = f1 * f12;
78 asm volatile ("mul.s f2, f0, f12"); //f2 = f0 * f12;
79 asm volatile ("lsi f12, %0, 0" :: "a" (win0)); //f12 = *(win + 0);
80
81 asm volatile ("mul.s f9, f7, f13"); //f9 = f7 * f13;
82 asm volatile ("mul.s f8, f6, f13"); //f8 = f6 * f13;
83
84 asm volatile ("lsi f13, %0, 0" :: "a" (win1)); //f13 = *(win + 2);
85
86 asm volatile ("madd.s f3, f0, f12"); //f3 += f0 * f12;
87 asm volatile ("msub.s f2, f1, f12"); //f2 -= f1 * f12;
88 asm volatile ("madd.s f9, f6, f13"); //f9 += f6 * f13;
89 asm volatile ("msub.s f8, f7, f13"); //f8 -= f7 * f13;
90 asm volatile ("addx8 %0, %1, %0" : "+a" (win0) : "a" (wind_step)); //win0 += 8 * wind_step;
91 asm volatile ("addx8 %0, %1, %0" : "+a" (win1) : "a" (wind_step)); //win1 += 8 * wind_step;
92 // Here we have tw1: f2,f3 and tw2: f8,f9
93
94 asm volatile ("sub.s f1, f5, f3"); //f1 = f5 - f3;
95 asm volatile ("sub.s f0, f4, f2"); //f0 = f4 - f2;
96
97 asm volatile ("add.s f3, f3, f5"); //f3 = f3 + f5;
98 asm volatile ("add.s f2, f4, f2"); //f2 = f4 + f2;
99
100 asm volatile ("sub.s f7, f11, f9"); //f7 = f11 - f9;
101 asm volatile ("sub.s f6, f10, f8"); //f6 = f10 - f8;
102 asm volatile ("add.s f9, f9, f11"); //f9 = f9 + f11;
103 asm volatile ("add.s f8, f10, f8"); //f8 = f10 + f8;
104
105 asm volatile ("mul.s f1, f1, f14"); //f1 *= f14;
106 asm volatile ("mul.s f0, f0, f14"); //f0 *= f14;
107
108 asm volatile ("mul.s f3, f3, f15"); //f3 *= -f14;
109 asm volatile ("mul.s f2, f2, f14"); //f2 *= f14;
110
111 asm volatile ("mul.s f7, f7, f14"); //f7 *= f14;
112 asm volatile ("mul.s f6, f6, f14"); //f6 *= f14;
113 asm volatile ("mul.s f9, f9, f15"); //f9 *= -f14;
114 asm volatile ("mul.s f8, f8, f14"); //f8 *= f14;
115
116 asm volatile ("ssi f1, %0, 12" :: "a" (data)); //*(data + 3) = f1;
117 asm volatile ("ssi f0, %0, 8" :: "a" (data)); //*(data + 2) = f0;
118
119 asm volatile ("ssi f3, %0, 12" :: "a" (ptr_inv)); //*(ptr_inv + 3) = f3;
120 asm volatile ("ssi f2, %0, 8" :: "a" (ptr_inv)); //*(ptr_inv + 2) = f2;
121
122 asm volatile ("ssi f7, %0, 20" :: "a" (data)); //*(data + 5) = f7;
123 asm volatile ("ssi f6, %0, 16" :: "a" (data)); //*(data + 4) = f6;
124 asm volatile ("addi %0, %0, 16" : "+a" (data)); //data += 4;
125
126 asm volatile ("ssi f9, %0, 4" :: "a" (ptr_inv)); //*(ptr_inv + 1) = f9;
127 asm volatile ("ssi f8, %0, 0" :: "a" (ptr_inv)); //*ptr_inv = f8;
128 asm volatile ("addi %0, %0, -16" : "+a" (ptr_inv)); //ptr_inv -= 4;
129 //}
130 asm volatile ("__loop_end_fftr_real_post_proc: nop");
131
132 return ESP_OK;
133}
134#endif // dsps_cplx2real_fc32_ae32_enabled
esp_err_t dsps_cplx2real_fc32_ae32_(float *data, int N, float *table, int table_size)
int esp_err_t
Definition esp_err.h:21
#define ESP_OK
Definition esp_err.h:23
static float data[128 *2]
Definition test_fft2r.c:34