Я написал реализацию алгоритма Штрассена-Винограда. Вот мой код
#include "pch.h"#include<iostream>
#include<cstdlib>
#include<cmath>
#include<ctime>
using namespace std;
void Output(int **matrix, int x, int y, int n);
void Matrix_Add(int **a, int **b, int **c, int n, int x1, int y1, int x2, int y2, int x3, int y3);
void Matrix_Sub(int **a, int **b, int **c, int n, int x1, int y1, int x2, int y2, int x3, int y3);
void Matrix_Multiply(int **a, int **b, int **c, int x1, int y1, int x2, int y2, int x3, int y3, int n);
void strassen(int **a, int **b, int **c, int **s1, int **s2, int **s3, int **s4, int **s5, int **s6, int **s7, int **s8, int **m1, int **m2, int **m3, int **m4, int **m5, int **m6, int **m7, int **t1, int **t2, int m, int n, int x1, int y1, int x2, int y2, int x3, int y3);
void Naive_Multiply(int **a, int **b, int **c, int n);
void Fill_Array(int **mas, int size, int worksize);
int Find_Square(int n);
int main()
{
int m;
cout << "Enter the size of matrix: ";
cin >> m;
int n = Find_Square(m);
int **A = new int*[n];
int **B = new int*[n];
int **C = new int*[n];
int **k = new int*[n];
int **s1 = new int*[n];
int **s2 = new int*[n];
int **s3 = new int*[n];
int **s4 = new int*[n];
int **s5 = new int*[n];
int **s6 = new int*[n];
int **s7 = new int*[n];
int **s8 = new int*[n];
int **m1 = new int*[n];
int **m2 = new int*[n];
int **m3 = new int*[n];
int **m4 = new int*[n];
int **m5 = new int*[n];
int **m6 = new int*[n];
int **m7 = new int*[n];
int **t1 = new int*[n];
int **t2 = new int*[n];
for (int i = 0; i < n; i++)
{
A[i] = new int[n];
B[i] = new int[n];
C[i] = new int[n];
k[i] = new int[n];
s1[i] = new int[n / 2];
s2[i] = new int[n / 2];
s3[i] = new int[n / 2];
s4[i] = new int[n / 2];
s5[i] = new int[n / 2];
s6[i] = new int[n / 2];
s7[i] = new int[n / 2];
s8[i] = new int[n / 2];
m1[i] = new int[n / 2];
m2[i] = new int[n / 2];
m3[i] = new int[n / 2];
m4[i] = new int[n / 2];
m5[i] = new int[n / 2];
m6[i] = new int[n / 2];
m7[i] = new int[n / 2];
t1[i] = new int[n / 2];
t2[i] = new int[n / 2];
}
Fill_Array(A, n, m);
Fill_Array(B, n, m);
cout << "First Matrix:" << endl; Output(A, 0, 0, m);
cout << "Second Matrix:" << endl; Output(B, 0, 0, m);
long double begin = clock();
//for (int i = 0; i < 100; i++)
Naive_Multiply(A, B, k, n);
long double end = clock();
cout << "Naive Multiply time: " << (end - begin) / CLOCKS_PER_SEC << endl; Output(k, 0, 0, m);
long double begin2 = clock();
//for (int i = 0; i < 100; i++)
strassen(A, B, C, s1, s2, s3, s4, s5, s6, s7, s8, m1, m2, m3, m4, m5, m6, m7, t1, t2, n, n, 0, 0, 0, 0, 0, 0);
long double end2 = clock();
cout << "Strassen-Winograd time: " << (end2 - begin2) / CLOCKS_PER_SEC << endl; Output(C, 0, 0, m);
for (int i = 0; i < n; i++)
{
delete[] A[i];
delete[] B[i];
delete[] C[i];
delete[] k[i];
delete[] s1[i];
delete[] s2[i];
delete[] s3[i];
delete[] s4[i];
delete[] s5[i];
delete[] s6[i];
delete[] s7[i];
delete[] s8[i];
delete[] m1[i];
delete[] m2[i];
delete[] m3[i];
delete[] m4[i];
delete[] m5[i];
delete[] m6[i];
delete[] m7[i];
delete[] t1[i];
delete[] t2[i];
}
delete[] s1;
delete[] s2;
delete[] s3;
delete[] s4;
delete[] s5;
delete[] s6;
delete[] s7;
delete[] s8;
delete[] m1;
delete[] m2;
delete[] m3;
delete[] m4;
delete[] m5;
delete[] m6;
delete[] m7;
delete[] t1;
delete[] t2;
system("pause");
return 0;
}
int Find_Square(int n) {
int p = 1;
while (pow(2, p) < n)
p++;
return(pow(2, p));
}
void Fill_Array(int **mas, int size, int worksize)
{
for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
mas[i][j] = 0;
for (int i = 0; i < worksize; i++)
for (int j = 0; j < worksize; j++)
mas[i][j] = rand() % 10;
}
void strassen(int **a, int **b, int **c, int **s1, int **s2, int **s3, int **s4, int **s5, int **s6, int **s7, int **s8, int **m1, int **m2, int **m3, int **m4, int **m5, int **m6, int **m7, int **t1, int **t2, int m, int n, int x1, int y1, int x2, int y2, int x3, int y3) {
m = n / 2;
if (m != 1)
{
Matrix_Add(a, a, s1, m, x1 + m, y1, x1 + m, y1 + m, n - m, n - m * 2);
Matrix_Sub(s1, a, s2, m, n - m, n - m * 2, x1, y1, n - m, n - m * 2);
Matrix_Sub(a, a, s3, m, x1, y1, x1 + m, y1, n - m, n - m * 2);
Matrix_Sub(a, s2, s4, m, x1, y1 + m, n - m, n - m * 2, n - m, n - m * 2);
Matrix_Sub(b, b, s5, m, x2, y2 + m, x2, y2, n - m, n - m * 2);
Matrix_Sub(b, s5, s6, m, x2 + m, y2 + m, n - m, n - m * 2, n - m, n - m * 2);
Matrix_Sub(b, b, s7, m, x2 + m, y2 + m, x2, y2 + m, n - m, n - m * 2);
Matrix_Sub(s6, b, s8, m, n - m, n - m * 2, x2 + m, y2, n - m, n - m * 2);
strassen(s2, s6, m1, s1, s2, s3, s4, s5, s6, s7, s8, m1, m2, m3, m4, m5, m6, m7, t1, t2, m, m, n - m, n - m * 2, n - m, n - m * 2, n - m, n - m * 2);
strassen(a, b, m2, s1, s2, s3, s4, s5, s6, s7, s8, m1, m2, m3, m4, m5, m6, m7, t1, t2, m, m, x1, y1, x2, y2, n - m, n - m * 2);
strassen(a, b, m3, s1, s2, s3, s4, s5, s6, s7, s8, m1, m2, m3, m4, m5, m6, m7, t1, t2, m, m, x1, y1 + m, x2 + m, y2, n - m, n - m * 2);
strassen(s3, s7, m4, s1, s2, s3, s4, s5, s6, s7, s8, m1, m2, m3, m4, m5, m6, m7, t1, t2, m, m, n - m, n - m * 2, n - m, n - m * 2, n - m, n - m * 2);
strassen(s1, s5, m5, s1, s2, s3, s4, s5, s6, s7, s8, m1, m2, m3, m4, m5, m6, m7, t1, t2, m, m, n - m, n - m * 2, n - m, n - m * 2, n - m, n - m * 2);
strassen(s4, b, m6, s1, s2, s3, s4, s5, s6, s7, s8, m1, m2, m3, m4, m5, m6, m7, t1, t2, m, m, n - m, n - m * 2, x2 + m, y2 + m, n - m, n - m * 2);
strassen(a, s8, m7, s1, s2, s3, s4, s5, s6, s7, s8, m1, m2, m3, m4, m5, m6, m7, t1, t2, m, m, x1 + m, y1 + m, n - m, n - m * 2, n - m, n - m * 2);Matrix_Add(m1, m2, t1, m, n - m, n - m * 2, n - m, n - m * 2, n - m, n - m * 2);
Matrix_Add(t1, m4, t2, m, n - m, n - m * 2, n - m, n - m * 2, n - m, n - m * 2);
Matrix_Add(m2, m3, c, m, n - m, n - m * 2, n - m, n - m * 2, x3, y3);
Matrix_Sub(t2, m7, c, m, n - m, n - m * 2, n - m, n - m * 2, x3 + m, y3);
Matrix_Add(t1, m5, c, m, n - m, n - m * 2, n - m, n - m * 2, x3, y3 + m);
Matrix_Add(c, m6, c, m, x3, y3 + m, n - m, n - m * 2, x3, y3 + m);
Matrix_Add(t2, m5, c, m, n - m, n - m * 2, n - m, n - m * 2, x3 + m, y3 + m);
}
else
{
Matrix_Multiply(a, b, c, x1, y1, x2, y2, x3, y3, n);
}
}
void Output(int **matrix, int x, int y, int n)
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
cout << matrix[i + x][j + y] << " ";
}
cout << endl;
}
cout << endl;
}
void Matrix_Add(int **a, int **b, int **c, int n, int x1, int y1, int x2, int y2, int x3, int y3)
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
c[i + x3][j + y3] = a[i + x1][j + y1] + b[i + x2][j + y2];
}
}
}
void Matrix_Sub(int **a, int **b, int **c, int n, int x1, int y1, int x2, int y2, int x3, int y3)
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
c[i + x3][j + y3] = a[i + x1][j + y1] - b[i + x2][j + y2];
}
}
}
void Matrix_Multiply(int **a, int **b, int **c, int x1, int y1, int x2, int y2, int x3, int y3, int n)
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
c[i + x3][j + y3] = 0;
for (int t = 0; t < n; t++) {
c[i + x3][j + y3] = c[i + x3][j + y3] + a[x1 + i][y1 + t] * b[x2 + t][y2 + j];
}
}
}
}
void Naive_Multiply(int **a, int **b, int **c, int n)
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
c[i][j] = 0;
for (int t = 0; t < n; t++) {
c[i][j] = c[i][j] + a[i][t] * b[t][j];
}
}
}
}
Как видите, в этой реализации нет копирующих частей матрицы и рекурсивного распределения памяти. Но работает медленно. Только при n = 1024 оно становится быстрее. Что не так с моим алгоритмом? Я думал, что это должно быть быстрее при n = 64 или 128, что-то в этом роде, но не в 1024.
Задача ещё не решена.
Других решений пока нет …