С моим сигналом, сделанным функцией 2 * cos (5 * i) + 0.7 * sin (3 * i)
я жду сложного массива, содержащего амплитуды и частоты:
…
«3 || cos: 0,0000 || sin: 0.7000»
…
«5 || cos: 2.0000 || sin: 0.0000»
…
Или то же самое. Но я получаю совсем другой результат. посоветуйте пожалуйста почему и где я не прав?
Вот мой код:
#include <cuda_runtime.h>
#include <cufft.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
typedef float2 Complex;
void excufft()
{
#define NX 256
#define BATCH 1
}
int main()
{
cufftHandle plan;
cufftComplex *data;
cudaMalloc((void**)&data, sizeof(cufftComplex)*NX*BATCH);
if (cudaGetLastError() != cudaSuccess) {
fprintf(stderr, "Cuda error: Failed to allocate\n");
return 1;
}
Complex* h_signal = (Complex*)malloc(sizeof(Complex) * NX);
for (unsigned int i = 0; i < NX; ++i) {
h_signal[i].x = 0.3*cos(5 * i)+0.7*sin(2*i);
h_signal[i].y = 0;
}
if (cudaMemcpy(data, h_signal, NX, cudaMemcpyHostToDevice) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: cudaMemcpy host to device failed");
return 1;
}
if (cufftPlan1d(&plan, NX, CUFFT_C2C, BATCH) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: Plan creation failed");
return 1;
}
if (cufftExecC2C(plan, data, data, CUFFT_FORWARD) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: ExecC2C Forward failed");
return 1;
}
if (cudaDeviceSynchronize() != cudaSuccess) {
fprintf(stderr, "Cuda error: Failed to synchronize\n");
return 1;
}
Complex* h_convolved_signal = h_signal;
for (unsigned int i = 0; i < NX; ++i) {
h_convolved_signal[i].x = 0;
h_convolved_signal[i].y = 0;
}
cudaMemcpy(h_convolved_signal, data, NX, cudaMemcpyDeviceToHost);
for (int i = 0; i<NX / 2 + 1; i++)
printf("%d || cos: %2.4f || sin: %2.4f\n", i, h_convolved_signal[i].x, h_convolved_signal[i].y);
cufftDestroy(plan);
cudaFree(data);
return 0;
}
И есть результат вывода:
0 || cos: 0.0669 || sin: 0.0000
1 || cos: 0.1567 || sin: 0.2224
2 || cos: 0.3738 || sin: 0.3182
3 || cos: 0.5919 || sin: 0.2345
4 || cos: 0.6839 || sin: 0.0229
5 || cos: 0.5961 || sin: -0.1905
6 || cos: 0.3796 || sin: -0.2779
7 || cos: 0.1607 || sin: -0.1850
8 || cos: 0.0676 || sin: 0.0380
9 || cos: 0.1558 || sin: 0.2642
10 || cos: 0.3750 || sin: 0.3647
11 || cos: 0.5981 || sin: 0.2832
12 || cos: 0.6951 || sin: 0.0697
13 || cos: 0.6089 || sin: -0.1481
14 || cos: 0.3894 || sin: -0.2387
15 || cos: 0.1653 || sin: -0.1442
16 || cos: 0.0695 || sin: 0.0852
17 || cos: 0.1605 || sin: 0.3194
18 || cos: 0.3877 || sin: 0.4244
19 || cos: 0.6194 || sin: 0.3407
20 || cos: 0.7197 || sin: 0.1198
21 || cos: 0.6285 || sin: -0.1045
22 || cos: 0.3987 || sin: -0.1942
23 || cos: 0.1664 || sin: -0.0889
24 || cos: 0.0719 || sin: 0.1572
25 || cos: 0.1758 || sin: 0.4046
26 || cos: 0.4215 || sin: 0.5107
27 || cos: 0.6657 || sin: 0.4147
28 || cos: 0.7627 || sin: 0.1765
29 || cos: 0.6517 || sin: -0.0556
30 || cos: 0.3970 || sin: -0.1322
31 || cos: 0.1526 || sin: 0.0068
32 || cos: 0.0000 || sin: 0.0000
33 || cos: 0.0000 || sin: 0.0000
34 || cos: 0.0000 || sin: 0.0000
...
Похоже, что я должен сделать дополнительную подготовку сигнала или некоторую постобработку.
NX*sizeof(Complex)
для каждого случая вместо просто NX.Использование простых цифр для частоты и амплитуды неправильно. Я нахожу этот пример сигнала в моих экспериментах:
amp * 0,8 * sin (частота * 24,56 * i / 1000)
И разделите частоту результата на 100.
Кстати, matlab fft и cufft — это разные методы, потому что matlab fft работает с действительными числами, а cufft сложен.
И есть правильный код:
#include <cuda_runtime.h>
#include <cufft.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
typedef float2 Complex;
void excufft()
{
#define NX 256
#define BATCH 1
}
int main()
{
float mulampl=0.8;
float mulfreq=24.56;
float divfreq = 100;
cufftHandle plan;
cufftComplex *data;
cudaMalloc((void**)&data, sizeof(cufftComplex)*NX*BATCH);
if (cudaGetLastError() != cudaSuccess) {
fprintf(stderr, "Cuda error: Failed to allocate\n");
return 1;
}
Complex* h_signal = (Complex*)malloc(sizeof(Complex) * NX);
for (unsigned int i = 0; i < NX; i++) {
h_signal[i].x = 17 * mulampl * sin(100 * mulfreq * i / 1000) + 5 * mulampl * cos(70 * mulfreq * i / 1000);
h_signal[i].y = 0;
}
if (cudaMemcpy(data, h_signal, sizeof(cufftComplex)*NX*BATCH, cudaMemcpyHostToDevice) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: cudaMemcpy host to device failed");
return 1;
}
if (cufftPlan1d(&plan, NX, CUFFT_C2C, BATCH) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: Plan creation failed");
return 1;
}
if (cufftExecC2C(plan, data, data, CUFFT_FORWARD) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: ExecZ2Z Forward failed");
return 1;
}
if (cudaDeviceSynchronize() != cudaSuccess) {
fprintf(stderr, "Cuda error: Failed to synchronize\n");
return 1;
}
Complex* h_convolved_signal = h_signal;
for (unsigned int i = 0; i < NX; ++i) {
h_convolved_signal[i].x = 0;
h_convolved_signal[i].y = 0;
}
cudaMemcpy(h_convolved_signal, data, sizeof(cufftComplex)*NX*BATCH, cudaMemcpyDeviceToHost);
for (int i = 0; i<NX/2; i++)
printf("%d || cos: %2.4f || sin: %2.4f\n", i, h_convolved_signal[i].x/divfreq, h_convolved_signal[i].y/divfreq);
cufftDestroy(plan);
cudaFree(data);
return 0;
}
выход:
0 || cos: -0.0197 || sin: 0.0000
1 || cos: -0.0197 || sin: -0.0004
2 || cos: -0.0197 || sin: -0.0007
3 || cos: -0.0197 || sin: -0.0011
4 || cos: -0.0196 || sin: -0.0014
5 || cos: -0.0196 || sin: -0.0018
6 || cos: -0.0196 || sin: -0.0022
7 || cos: -0.0196 || sin: -0.0025
8 || cos: -0.0196 || sin: -0.0029
9 || cos: -0.0196 || sin: -0.0033
10 || cos: -0.0195 || sin: -0.0036
11 || cos: -0.0195 || sin: -0.0040
12 || cos: -0.0194 || sin: -0.0044
13 || cos: -0.0194 || sin: -0.0048
14 || cos: -0.0193 || sin: -0.0052
15 || cos: -0.0193 || sin: -0.0055
16 || cos: -0.0192 || sin: -0.0059
17 || cos: -0.0192 || sin: -0.0063
18 || cos: -0.0191 || sin: -0.0067
19 || cos: -0.0190 || sin: -0.0071
20 || cos: -0.0189 || sin: -0.0075
21 || cos: -0.0189 || sin: -0.0078
22 || cos: -0.0188 || sin: -0.0082
23 || cos: -0.0187 || sin: -0.0087
24 || cos: -0.0186 || sin: -0.0091
25 || cos: -0.0185 || sin: -0.0095
26 || cos: -0.0184 || sin: -0.0099
27 || cos: -0.0183 || sin: -0.0103
28 || cos: -0.0181 || sin: -0.0107
29 || cos: -0.0180 || sin: -0.0111
30 || cos: -0.0179 || sin: -0.0116
31 || cos: -0.0177 || sin: -0.0120
32 || cos: -0.0175 || sin: -0.0125
33 || cos: -0.0174 || sin: -0.0129
34 || cos: -0.0172 || sin: -0.0134
35 || cos: -0.0170 || sin: -0.0138
36 || cos: -0.0168 || sin: -0.0143
37 || cos: -0.0166 || sin: -0.0148
38 || cos: -0.0164 || sin: -0.0153
39 || cos: -0.0161 || sin: -0.0157
40 || cos: -0.0158 || sin: -0.0162
41 || cos: -0.0156 || sin: -0.0167
42 || cos: -0.0153 || sin: -0.0173
43 || cos: -0.0149 || sin: -0.0178
44 || cos: -0.0145 || sin: -0.0183
45 || cos: -0.0141 || sin: -0.0189
46 || cos: -0.0137 || sin: -0.0194
47 || cos: -0.0133 || sin: -0.0200
48 || cos: -0.0128 || sin: -0.0205
49 || cos: -0.0123 || sin: -0.0211
50 || cos: -0.0116 || sin: -0.0217
51 || cos: -0.0110 || sin: -0.0223
52 || cos: -0.0102 || sin: -0.0229
53 || cos: -0.0094 || sin: -0.0236
54 || cos: -0.0085 || sin: -0.0242
55 || cos: -0.0075 || sin: -0.0248
56 || cos: -0.0063 || sin: -0.0255
57 || cos: -0.0049 || sin: -0.0261
58 || cos: -0.0034 || sin: -0.0268
59 || cos: -0.0015 || sin: -0.0274
60 || cos: 0.0007 || sin: -0.0280
61 || cos: 0.0034 || sin: -0.0286
62 || cos: 0.0067 || sin: -0.0292
63 || cos: 0.0110 || sin: -0.0296
64 || cos: 0.0166 || sin: -0.0299
65 || cos: 0.0244 || sin: -0.0299
66 || cos: 0.0361 || sin: -0.0294
67 || cos: 0.0552 || sin: -0.0279
68 || cos: 0.0931 || sin: -0.0237
69 || cos: 0.2029 || sin: -0.0089
70 || cos: 5.0262 || sin: 0.6993
71 || cos: -0.2672 || sin: -0.0812
72 || cos: -0.1410 || sin: -0.0643
73 || cos: -0.1000 || sin: -0.0601
74 || cos: -0.0797 || sin: -0.0590
75 || cos: -0.0674 || sin: -0.0592
76 || cos: -0.0591 || sin: -0.0602
77 || cos: -0.0531 || sin: -0.0617
78 || cos: -0.0484 || sin: -0.0636
79 || cos: -0.0447 || sin: -0.0659
80 || cos: -0.0415 || sin: -0.0684
81 || cos: -0.0388 || sin: -0.0713
82 || cos: -0.0364 || sin: -0.0747
83 || cos: -0.0342 || sin: -0.0784
84 || cos: -0.0321 || sin: -0.0827
85 || cos: -0.0301 || sin: -0.0875
86 || cos: -0.0281 || sin: -0.0930
87 || cos: -0.0260 || sin: -0.0993
88 || cos: -0.0238 || sin: -0.1067
89 || cos: -0.0214 || sin: -0.1153
90 || cos: -0.0187 || sin: -0.1257
91 || cos: -0.0156 || sin: -0.1383
92 || cos: -0.0120 || sin: -0.1540
93 || cos: -0.0074 || sin: -0.1740
94 || cos: -0.0015 || sin: -0.2005
95 || cos: 0.0065 || sin: -0.2374
96 || cos: 0.0183 || sin: -0.2923
97 || cos: 0.0377 || sin: -0.3828
98 || cos: 0.0754 || sin: -0.5605
99 || cos: 0.1835 || sin: -1.0706
100 || cos: 3.5418 || sin: -16.9244
101 || cos: -0.2941 || sin: 1.1851
102 || cos: -0.1622 || sin: 0.5624
103 || cos: -0.1202 || sin: 0.3639
104 || cos: -0.0994 || sin: 0.2660
105 || cos: -0.0871 || sin: 0.2077
106 || cos: -0.0789 || sin: 0.1688
107 || cos: -0.0731 || sin: 0.1409
108 || cos: -0.0688 || sin: 0.1199
109 || cos: -0.0654 || sin: 0.1035
110 || cos: -0.0627 || sin: 0.0902
111 || cos: -0.0605 || sin: 0.0793
112 || cos: -0.0587 || sin: 0.0700
113 || cos: -0.0572 || sin: 0.0620
114 || cos: -0.0560 || sin: 0.0551
115 || cos: -0.0549 || sin: 0.0490
116 || cos: -0.0540 || sin: 0.0435
117 || cos: -0.0532 || sin: 0.0385
118 || cos: -0.0525 || sin: 0.0339
119 || cos: -0.0520 || sin: 0.0297
120 || cos: -0.0515 || sin: 0.0258
121 || cos: -0.0510 || sin: 0.0221
122 || cos: -0.0507 || sin: 0.0186
123 || cos: -0.0504 || sin: 0.0153
124 || cos: -0.0502 || sin: 0.0121
125 || cos: -0.0500 || sin: 0.0090
126 || cos: -0.0499 || sin: 0.0059
127 || cos: -0.0498 || sin: 0.0029
Спасибо всем!
Других решений пока нет …